xref: /spdk/lib/ftl/mngt/ftl_mngt_recovery.c (revision 588dfe314bb83d86effdf67ec42837b11c2620bf)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/bdev_module.h"
7 
8 #include "ftl_nv_cache.h"
9 #include "ftl_core.h"
10 #include "ftl_utils.h"
11 #include "ftl_band.h"
12 #include "ftl_internal.h"
13 #include "ftl_l2p_cache.h"
14 #include "ftl_mngt.h"
15 #include "ftl_mngt_steps.h"
16 #include "utils/ftl_addr_utils.h"
17 
18 struct ftl_mngt_recovery_ctx {
19 	/* Main recovery FTL management process */
20 	struct ftl_mngt_process *main;
21 	int status;
22 	TAILQ_HEAD(, ftl_band) open_bands;
23 	uint64_t open_bands_num;
24 	struct {
25 		struct ftl_layout_region region;
26 		struct ftl_md *md;
27 		uint64_t *l2p;
28 		uint64_t *seq_id;
29 		uint64_t count;
30 	} l2p_snippet;
31 	struct {
32 		uint64_t block_limit;
33 		uint64_t lba_first;
34 		uint64_t lba_last;
35 		uint32_t i;
36 	} iter;
37 	uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
38 };
39 
40 static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
41 static const struct ftl_mngt_process_desc g_desc_recovery;
42 static const struct ftl_mngt_process_desc g_desc_recovery_shm;
43 
44 static bool
45 recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
46 {
47 	return 0 == ctx->l2p_snippet.region.current.blocks;
48 }
49 
50 static void
51 recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
52 {
53 	struct ftl_layout_region *region, *snippet;
54 	uint64_t first_block, last_blocks;
55 
56 	ctx->iter.i++;
57 	region = &dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
58 	snippet = &ctx->l2p_snippet.region;
59 
60 	/* Advance processed blocks */
61 	snippet->current.offset += snippet->current.blocks;
62 	snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
63 	snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
64 
65 	first_block = snippet->current.offset - region->current.offset;
66 	ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
67 
68 	last_blocks = first_block + snippet->current.blocks;
69 	ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
70 
71 	if (ctx->iter.lba_last > dev->num_lbas) {
72 		ctx->iter.lba_last = dev->num_lbas;
73 	}
74 }
75 
76 static void
77 ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
78 {
79 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
80 	const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
81 	uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
82 	uint64_t l2p_limit_block, seq_limit_block, md_blocks;
83 	int md_flags;
84 
85 	ctx->main = mngt;
86 
87 	if (ftl_fast_recovery(dev)) {
88 		/* If shared memory fast recovery then we don't need temporary buffers */
89 		ftl_mngt_next_step(mngt);
90 		return;
91 	}
92 
93 	/*
94 	 * Recovery process allocates temporary buffers, to not exceed memory limit free L2P
95 	 * metadata buffers if they exist, they will be recreated in L2P initialization phase
96 	 */
97 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
98 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
99 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
100 
101 	/* Below values are in byte unit */
102 	mem_limit = dev->conf.l2p_dram_limit * MiB;
103 	mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
104 			     MiB) * MiB);
105 
106 	lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
107 	l2p_limit = lba_limit * dev->layout.l2p.addr_size;
108 	iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
109 
110 	ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
111 
112 	/* Round to block size */
113 	ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
114 
115 	seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
116 
117 	FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
118 	FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
119 	FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
120 	FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
121 	dev->sb->ckpt_seq_id = 0;
122 
123 	/* Initialize region */
124 	ctx->l2p_snippet.region = dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
125 	/* Limit blocks in region, it will be needed for ftl_md_set_region */
126 	ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
127 
128 	l2p_limit_block = ctx->iter.block_limit;
129 	seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
130 
131 	md_blocks = l2p_limit_block + seq_limit_block;
132 	md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
133 
134 	/* Initialize snippet of L2P metadata */
135 	ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
136 					    &ctx->l2p_snippet.region);
137 	if (!ctx->l2p_snippet.md) {
138 		ftl_mngt_fail_step(mngt);
139 		return;
140 	}
141 
142 	ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
143 
144 	/* Initialize recovery iterator, we call it with blocks set to zero,
145 	 * it means zero block done (processed), thanks that it will recalculate
146 	 *  offsets and starting LBA to initial position */
147 	ctx->l2p_snippet.region.current.blocks = 0;
148 	recovery_iter_advance(dev, ctx);
149 
150 	/* Initialize snippet of sequence IDs */
151 	ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
152 					       (l2p_limit_block * FTL_BLOCK_SIZE));
153 
154 	TAILQ_INIT(&ctx->open_bands);
155 	ftl_mngt_next_step(mngt);
156 }
157 
158 static void
159 ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
160 {
161 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
162 
163 	ftl_md_destroy(ctx->l2p_snippet.md, 0);
164 	ctx->l2p_snippet.md = NULL;
165 	ctx->l2p_snippet.seq_id = NULL;
166 
167 	ftl_mngt_next_step(mngt);
168 }
169 
170 static void
171 recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
172 {
173 	struct ftl_mngt_recovery_ctx *ctx = _ctx;
174 
175 	recovery_iter_advance(dev, ctx);
176 
177 	if (status) {
178 		ftl_mngt_fail_step(ctx->main);
179 	} else {
180 		ftl_mngt_continue_step(ctx->main);
181 	}
182 }
183 
184 static void
185 ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
186 {
187 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
188 
189 	if (ftl_fast_recovery(dev)) {
190 		ftl_mngt_skip_step(mngt);
191 		return;
192 	}
193 
194 	if (recovery_iter_done(dev, ctx)) {
195 		ftl_mngt_next_step(mngt);
196 	} else {
197 		ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
198 	}
199 }
200 
201 static void
202 restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
203 {
204 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
205 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
206 	struct ftl_band *band;
207 	uint64_t num_bands = ftl_get_num_bands(dev);
208 	uint64_t i;
209 
210 	if (status) {
211 		/* Restore error, end step */
212 		ftl_mngt_fail_step(mngt);
213 		return;
214 	}
215 
216 	for (i = 0; i < num_bands; i++) {
217 		band = &dev->bands[i];
218 
219 		switch (band->md->state) {
220 		case FTL_BAND_STATE_FREE:
221 			ftl_band_initialize_free_state(band);
222 			break;
223 		case FTL_BAND_STATE_OPEN:
224 			TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
225 			TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
226 			break;
227 		case FTL_BAND_STATE_CLOSED:
228 			break;
229 		default:
230 			status = -EINVAL;
231 		}
232 	}
233 
234 	if (status) {
235 		ftl_mngt_fail_step(mngt);
236 	} else {
237 		ftl_mngt_next_step(mngt);
238 	}
239 }
240 
241 static void
242 ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
243 {
244 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
245 
246 	md->owner.cb_ctx = mngt;
247 	md->cb = restore_band_state_cb;
248 	ftl_md_restore(md);
249 }
250 
251 struct band_md_ctx {
252 	int status;
253 	uint64_t qd;
254 	uint64_t id;
255 };
256 
257 static void
258 ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt,
259 				    ftl_band_md_cb cb)
260 {
261 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
262 	uint64_t num_bands = ftl_get_num_bands(dev);
263 
264 	/*
265 	 * This function generates a high queue depth and will utilize ftl_mngt_continue_step during completions to make sure all bands
266 	 * are processed before returning an error (if any were found) or continuing on.
267 	 */
268 	if (0 == sctx->qd && sctx->id == num_bands) {
269 		if (sctx->status) {
270 			ftl_mngt_fail_step(mngt);
271 		} else {
272 			ftl_mngt_next_step(mngt);
273 		}
274 		return;
275 	}
276 
277 	while (sctx->id < num_bands) {
278 		struct ftl_band *band = &dev->bands[sctx->id];
279 
280 		if (FTL_BAND_STATE_FREE == band->md->state) {
281 			sctx->id++;
282 			continue;
283 		}
284 
285 		if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
286 			/* This band is already open and has valid P2L map */
287 			sctx->id++;
288 			sctx->qd++;
289 			ftl_band_acquire_p2l_map(band);
290 			cb(band, mngt, FTL_MD_SUCCESS);
291 			continue;
292 		} else {
293 			if (dev->sb->ckpt_seq_id && (band->md->close_seq_id <= dev->sb->ckpt_seq_id)) {
294 				sctx->id++;
295 				continue;
296 			}
297 
298 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
299 			if (ftl_band_alloc_p2l_map(band)) {
300 				/* No more free P2L map, try later */
301 				break;
302 			}
303 		}
304 
305 		sctx->id++;
306 		ftl_band_read_tail_brq_md(band, cb, mngt);
307 		sctx->qd++;
308 	}
309 
310 	if (0 == sctx->qd) {
311 		/*
312 		 * No QD could happen due to all leftover bands being in free state.
313 		 * For streamlining of all potential error handling (since many bands are reading P2L at the same time),
314 		 * we're using ftl_mngt_continue_step to arrive at the same spot of checking for mngt step end (see beginning of function).
315 		 */
316 		ftl_mngt_continue_step(mngt);
317 	}
318 }
319 
320 static void
321 ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
322 {
323 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
324 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
325 	uint64_t *trim_map = ftl_md_get_buffer(md);
326 	uint64_t page_id, trim_seq_id;
327 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
328 	uint64_t lba, lba_off;
329 
330 	if (dev->sb->ckpt_seq_id) {
331 		FTL_ERRLOG(dev, "Checkpoint recovery not supported!\n");
332 		ftl_mngt_fail_step(mngt);
333 		return;
334 	}
335 
336 	for (lba = ctx->iter.lba_first; lba < ctx->iter.lba_last; lba++) {
337 		lba_off = lba - ctx->iter.lba_first;
338 		page_id = lba / lbas_in_page;
339 
340 		assert(page_id < ftl_md_get_buffer_size(md) / sizeof(*trim_map));
341 		assert(page_id < dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P].current.blocks);
342 		assert(lba_off < ctx->l2p_snippet.count);
343 
344 		trim_seq_id = trim_map[page_id];
345 
346 		ctx->l2p_snippet.seq_id[lba_off] = trim_seq_id;
347 		ftl_addr_store(dev, ctx->l2p_snippet.l2p, lba_off, FTL_ADDR_INVALID);
348 	}
349 
350 	ftl_mngt_next_step(mngt);
351 }
352 
353 static void
354 l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
355 {
356 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
357 
358 	if (status) {
359 		ftl_mngt_fail_step(mngt);
360 	} else {
361 		ftl_mngt_next_step(mngt);
362 	}
363 }
364 
365 static void
366 ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
367 {
368 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
369 	struct ftl_md *md = ctx->l2p_snippet.md;
370 	struct ftl_layout_region *region = &ctx->l2p_snippet.region;
371 
372 	FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
373 	FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
374 		      region->current.offset, region->current.offset + region->current.blocks,
375 		      ctx->iter.lba_first, ctx->iter.lba_last);
376 
377 	if (ftl_md_set_region(md, &ctx->l2p_snippet.region)) {
378 		ftl_mngt_fail_step(mngt);
379 		return;
380 	}
381 
382 	md->owner.cb_ctx = mngt;
383 	md->cb = l2p_cb;
384 	ftl_md_restore(md);
385 }
386 
387 static void
388 ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
389 {
390 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
391 	struct ftl_md *md = ctx->l2p_snippet.md;
392 
393 	md->owner.cb_ctx = mngt;
394 	md->cb = l2p_cb;
395 	ftl_md_persist(md);
396 }
397 
398 static void
399 restore_band_l2p_cb(struct ftl_band *band, void *cntx, enum ftl_md_status status)
400 {
401 	struct ftl_mngt_process *mngt = cntx;
402 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
403 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
404 	struct spdk_ftl_dev *dev = band->dev;
405 	ftl_addr addr, curr_addr;
406 	uint64_t i, lba, seq_id, num_blks_in_band;
407 	uint32_t band_map_crc;
408 	int rc = 0;
409 
410 	if (status != FTL_MD_SUCCESS) {
411 		FTL_ERRLOG(dev, "L2P band restore error, failed to read P2L map\n");
412 		rc = -EIO;
413 		goto cleanup;
414 	}
415 
416 	band_map_crc = spdk_crc32c_update(band->p2l_map.band_map,
417 					  ftl_tail_md_num_blocks(band->dev) * FTL_BLOCK_SIZE, 0);
418 
419 	/* P2L map is only valid if the band state is closed */
420 	if (FTL_BAND_STATE_CLOSED == band->md->state && band->md->p2l_map_checksum != band_map_crc) {
421 		FTL_ERRLOG(dev, "L2P band restore error, inconsistent P2L map CRC\n");
422 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_BASE);
423 		rc = -EINVAL;
424 		goto cleanup;
425 	}
426 
427 	num_blks_in_band = ftl_get_num_blocks_in_band(dev);
428 	for (i = 0; i < num_blks_in_band; ++i) {
429 		uint64_t lba_off;
430 		lba = band->p2l_map.band_map[i].lba;
431 		seq_id = band->p2l_map.band_map[i].seq_id;
432 
433 		if (lba == FTL_LBA_INVALID) {
434 			continue;
435 		}
436 		if (lba >= dev->num_lbas) {
437 			FTL_ERRLOG(dev, "L2P band restore ERROR, LBA out of range\n");
438 			rc = -EINVAL;
439 			break;
440 		}
441 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
442 			continue;
443 		}
444 
445 		lba_off = lba - pctx->iter.lba_first;
446 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
447 
448 			/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
449 			if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
450 				addr = ftl_band_addr_from_block_offset(band, i);
451 				ftl_band_set_p2l(band, FTL_LBA_INVALID, addr, 0);
452 			}
453 
454 			/* Newer data already recovered */
455 			continue;
456 		}
457 
458 		addr = ftl_band_addr_from_block_offset(band, i);
459 
460 		curr_addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
461 
462 		/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
463 		if (curr_addr != FTL_ADDR_INVALID && !ftl_addr_in_nvc(dev, curr_addr) && curr_addr != addr) {
464 			struct ftl_band *curr_band = ftl_band_from_addr(dev, curr_addr);
465 
466 			if (FTL_BAND_STATE_OPEN == curr_band->md->state || FTL_BAND_STATE_FULL == curr_band->md->state) {
467 				size_t prev_offset = ftl_band_block_offset_from_addr(curr_band, curr_addr);
468 				if (curr_band->p2l_map.band_map[prev_offset].lba == lba &&
469 				    seq_id >= curr_band->p2l_map.band_map[prev_offset].seq_id) {
470 					ftl_band_set_p2l(curr_band, FTL_LBA_INVALID, curr_addr, 0);
471 				}
472 			}
473 		}
474 
475 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
476 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
477 	}
478 
479 
480 cleanup:
481 	ftl_band_release_p2l_map(band);
482 
483 	sctx->qd--;
484 	if (rc) {
485 		sctx->status = rc;
486 	}
487 
488 	ftl_mngt_continue_step(mngt);
489 }
490 
491 static void
492 ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev *dev,
493 		struct ftl_mngt_process *mngt)
494 {
495 	ftl_mngt_recovery_walk_band_tail_md(dev, mngt, restore_band_l2p_cb);
496 }
497 
498 static int
499 restore_chunk_l2p_cb(struct ftl_nv_cache_chunk *chunk, void *ctx)
500 {
501 	struct ftl_mngt_recovery_ctx *pctx = ctx;
502 	struct spdk_ftl_dev *dev;
503 	struct ftl_nv_cache *nv_cache = chunk->nv_cache;
504 	ftl_addr addr;
505 	const uint64_t seq_id = chunk->md->seq_id;
506 	uint64_t i, lba;
507 	uint32_t chunk_map_crc;
508 
509 	dev = SPDK_CONTAINEROF(chunk->nv_cache, struct spdk_ftl_dev, nv_cache);
510 
511 	chunk_map_crc = spdk_crc32c_update(chunk->p2l_map.chunk_map,
512 					   ftl_nv_cache_chunk_tail_md_num_blocks(chunk->nv_cache) * FTL_BLOCK_SIZE, 0);
513 	if (chunk->md->p2l_map_checksum != chunk_map_crc) {
514 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_NV_CACHE);
515 		return -1;
516 	}
517 
518 	for (i = 0; i < nv_cache->chunk_blocks; ++i) {
519 		uint64_t lba_off;
520 
521 		lba = ftl_chunk_map_get_lba(chunk, i);
522 
523 		if (lba == FTL_LBA_INVALID) {
524 			continue;
525 		}
526 		if (lba >= dev->num_lbas) {
527 			FTL_ERRLOG(dev, "L2P Chunk restore ERROR, LBA out of range\n");
528 			return -1;
529 		}
530 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
531 			continue;
532 		}
533 
534 		lba_off = lba - pctx->iter.lba_first;
535 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
536 			/* Newer data already recovered */
537 			continue;
538 		}
539 
540 		addr = ftl_addr_from_nvc_offset(dev, chunk->offset + i);
541 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
542 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
543 	}
544 
545 	return 0;
546 }
547 
548 static void
549 ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev *dev,
550 		struct ftl_mngt_process *mngt)
551 {
552 	ftl_mngt_nv_cache_restore_l2p(dev, mngt, restore_chunk_l2p_cb, ftl_mngt_get_caller_ctx(mngt));
553 }
554 
555 static void
556 ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
557 		struct ftl_mngt_process *mngt)
558 {
559 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
560 	uint64_t lba, lba_off;
561 	ftl_addr addr;
562 
563 	for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
564 		lba_off = lba - pctx->iter.lba_first;
565 		addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
566 
567 		if (addr == FTL_ADDR_INVALID) {
568 			continue;
569 		}
570 
571 		if (!ftl_addr_in_nvc(dev, addr)) {
572 			struct ftl_band *band = ftl_band_from_addr(dev, addr);
573 			band->p2l_map.num_valid++;
574 		}
575 
576 		if (ftl_bitmap_get(dev->valid_map, addr)) {
577 			assert(false);
578 			ftl_mngt_fail_step(mngt);
579 			return;
580 		} else {
581 			ftl_bitmap_set(dev->valid_map, addr);
582 		}
583 	}
584 
585 	ftl_mngt_next_step(mngt);
586 }
587 
588 static void
589 p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
590 {
591 	uint64_t seq_id;
592 	int md_region, ckpt_id;
593 
594 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
595 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
596 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
597 		seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
598 		pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
599 		FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
600 	}
601 }
602 
603 static int
604 p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
605 {
606 	uint64_t seq_id;
607 	int md_region, ckpt_id;
608 
609 	memset(band->p2l_map.band_map, -1,
610 	       FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
611 
612 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
613 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
614 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
615 		seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
616 		if (seq_id == band->md->seq) {
617 			FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
618 				      PRIu64"\n", band->id, ckpt_id, seq_id);
619 			return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
620 		}
621 	}
622 
623 	/* Band opened but no valid blocks within it, set write pointer to 0 */
624 	ftl_band_iter_init(band);
625 	FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
626 		      " match any P2L checkpoint\n", band->id, band->md->seq);
627 	return 0;
628 }
629 
630 static void
631 ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
632 {
633 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
634 
635 	p2l_ckpt_preprocess(dev, pctx);
636 	ftl_mngt_next_step(mngt);
637 }
638 
639 static void
640 ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
641 {
642 	ftl_recover_max_seq(dev);
643 	ftl_mngt_next_step(mngt);
644 }
645 
646 static void
647 ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
648 {
649 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
650 	struct ftl_band *band;
651 
652 	if (TAILQ_EMPTY(&pctx->open_bands)) {
653 		FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
654 		if (pctx->status) {
655 			ftl_mngt_fail_step(mngt);
656 		} else {
657 			ftl_mngt_next_step(mngt);
658 		}
659 		return;
660 	}
661 
662 	if (!ftl_mngt_get_step_ctx(mngt)) {
663 		ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
664 
665 		/* Step first time called, initialize */
666 		TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
667 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
668 			if (ftl_band_alloc_p2l_map(band)) {
669 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate P2L map\n");
670 				ftl_mngt_fail_step(mngt);
671 				return;
672 			}
673 
674 			if (p2l_ckpt_restore_p2l(pctx, band)) {
675 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
676 				ftl_mngt_fail_step(mngt);
677 				return;
678 			}
679 
680 			if (!band->p2l_map.p2l_ckpt) {
681 				band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
682 				if (!band->p2l_map.p2l_ckpt) {
683 					FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
684 					ftl_mngt_fail_step(mngt);
685 					return;
686 				}
687 			}
688 		}
689 	}
690 
691 	band = TAILQ_FIRST(&pctx->open_bands);
692 
693 	if (ftl_band_filled(band, band->md->iter.offset)) {
694 		band->md->state = FTL_BAND_STATE_FULL;
695 	}
696 
697 	/* In a next step (finalize band initialization) this band will
698 	 * be assigned to the writer. So temporary we move this band
699 	 * to the closed list, and in the next step it will be moved to
700 	 * the writer from such list.
701 	 */
702 	TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
703 	TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
704 
705 	FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
706 		      band->id, band->md->seq, band->md->iter.offset);
707 
708 	ftl_mngt_continue_step(mngt);
709 }
710 
711 static void
712 ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
713 {
714 	ftl_valid_map_load_state(dev);
715 	ftl_mngt_next_step(mngt);
716 }
717 
718 static void
719 ftl_mngt_complete_unmap_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
720 {
721 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
722 
723 	dev->sb_shm->trim.in_progress = false;
724 
725 	if (!status) {
726 		ftl_mngt_next_step(mngt);
727 	} else {
728 		ftl_mngt_fail_step(mngt);
729 	}
730 }
731 
732 static void
733 ftl_mngt_complete_unmap(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
734 {
735 	uint64_t start_lba, num_blocks, seq_id;
736 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
737 
738 	if (dev->sb_shm->trim.in_progress) {
739 		start_lba = dev->sb_shm->trim.start_lba;
740 		num_blocks = dev->sb_shm->trim.num_blocks;
741 		seq_id = dev->sb_shm->trim.seq_id;
742 
743 		assert(seq_id <= dev->sb->seq_id);
744 
745 		FTL_NOTICELOG(dev, "Uncomplete unmap detected lba: %"PRIu64" num_blocks: %"PRIu64"\n",
746 			      start_lba, num_blocks);
747 
748 		ftl_set_unmap_map(dev, start_lba, num_blocks, seq_id);
749 	}
750 
751 	md->owner.cb_ctx = mngt;
752 	md->cb = ftl_mngt_complete_unmap_cb;
753 
754 	ftl_md_persist(md);
755 }
756 
757 static void
758 ftl_mngt_recover_unmap_map_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
759 {
760 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
761 	uint64_t num_md_blocks, first_page, num_pages;
762 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
763 	uint64_t *page = ftl_md_get_buffer(md);
764 	union ftl_md_vss *page_vss = ftl_md_get_vss_buffer(md);
765 	uint64_t lba, num_blocks, vss_seq_id;
766 	size_t i, j;
767 
768 	if (status) {
769 		ftl_mngt_fail_step(mngt);
770 		return;
771 	}
772 
773 	num_md_blocks = ftl_md_get_buffer_size(md) / FTL_BLOCK_SIZE;
774 
775 	for (i = 0; i < num_md_blocks; ++i, page_vss++) {
776 		lba = page_vss->unmap.start_lba;
777 		num_blocks = page_vss->unmap.num_blocks;
778 		vss_seq_id = page_vss->unmap.seq_id;
779 
780 		first_page = lba / lbas_in_page;
781 		num_pages = num_blocks / lbas_in_page;
782 
783 		if (lba % lbas_in_page || num_blocks % lbas_in_page) {
784 			ftl_mngt_fail_step(mngt);
785 			return;
786 		}
787 
788 		for (j = first_page; j < first_page + num_pages; ++j) {
789 			page[j] = spdk_max(vss_seq_id, page[j]);
790 		}
791 	}
792 
793 	ftl_mngt_next_step(mngt);
794 }
795 
796 static void
797 ftl_mngt_recover_unmap_map(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
798 {
799 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
800 
801 	if (ftl_fast_recovery(dev)) {
802 		FTL_DEBUGLOG(dev, "SHM: skipping unmap map recovery\n");
803 		ftl_mngt_next_step(mngt);
804 		return;
805 	}
806 
807 	md->owner.cb_ctx = mngt;
808 	md->cb = ftl_mngt_recover_unmap_map_cb;
809 	ftl_md_restore(md);
810 }
811 
812 static void
813 ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
814 {
815 	if (ftl_fast_recovery(dev)) {
816 		ftl_mngt_call_process(mngt, &g_desc_recovery_shm);
817 	} else {
818 		ftl_mngt_skip_step(mngt);
819 	}
820 }
821 
822 /*
823  * During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
824  * recreating it all at the same time may take up to much DRAM, so it's done in multiple
825  * iterations. This process describes the recovery of a part of L2P in one iteration.
826  */
827 static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
828 	.name = "FTL recovery iteration",
829 	.steps = {
830 		{
831 			.name = "Load L2P",
832 			.action = ftl_mngt_recovery_iteration_load_l2p,
833 		},
834 		{
835 			.name = "Initialize sequence IDs",
836 			.action = ftl_mngt_recovery_iteration_init_seq_ids,
837 		},
838 		{
839 			.name = "Restore chunk L2P",
840 			.action = ftl_mngt_recovery_iteration_restore_chunk_l2p,
841 		},
842 		{
843 			.name = "Restore band L2P",
844 			.ctx_size = sizeof(struct band_md_ctx),
845 			.action = ftl_mngt_recovery_iteration_restore_band_l2p,
846 		},
847 		{
848 			.name = "Restore valid map",
849 			.action = ftl_mngt_recovery_iteration_restore_valid_map,
850 		},
851 		{
852 			.name = "Save L2P",
853 			.action = ftl_mngt_recovery_iteration_save_l2p,
854 		},
855 		{}
856 	}
857 };
858 
859 /*
860  * Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
861  * iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
862  */
863 static const struct ftl_mngt_process_desc g_desc_recovery = {
864 	.name = "FTL recovery",
865 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
866 	.steps = {
867 		{
868 			.name = "Initialize recovery",
869 			.action = ftl_mngt_recovery_init,
870 			.cleanup = ftl_mngt_recovery_deinit
871 		},
872 		{
873 			.name = "Recover band state",
874 			.action = ftl_mngt_recovery_restore_band_state,
875 		},
876 		{
877 			.name = "Initialize P2L checkpointing",
878 			.action = ftl_mngt_p2l_init_ckpt,
879 			.cleanup = ftl_mngt_p2l_deinit_ckpt
880 		},
881 		{
882 			.name = "Restore P2L checkpoints",
883 			.action = ftl_mngt_p2l_restore_ckpt
884 		},
885 		{
886 			.name = "Preprocess P2L checkpoints",
887 			.action = ftl_mngt_recovery_pre_process_p2l
888 		},
889 		{
890 			.name = "Recover open bands P2L",
891 			.action = ftl_mngt_recovery_open_bands_p2l
892 		},
893 		{
894 			.name = "Recover chunk state",
895 			.action = ftl_mngt_nv_cache_restore_chunk_state
896 		},
897 		{
898 			.name = "Recover max seq ID",
899 			.action = ftl_mngt_recover_seq_id
900 		},
901 		{
902 			.name = "Recover unmap map",
903 			.action = ftl_mngt_recover_unmap_map
904 		},
905 		{
906 			.name = "Recover open chunks P2L",
907 			.action = ftl_mngt_nv_cache_recover_open_chunk
908 		},
909 		{
910 			.name = "Recovery iterations",
911 			.action = ftl_mngt_recovery_run_iteration,
912 		},
913 		{
914 			.name = "Deinitialize recovery",
915 			.action = ftl_mngt_recovery_deinit
916 		},
917 		{
918 			.name = "Initialize L2P",
919 			.action = ftl_mngt_init_l2p,
920 			.cleanup = ftl_mngt_deinit_l2p
921 		},
922 		{
923 			.name = "Recover L2P from shared memory",
924 			.action = ftl_mngt_recovery_shm_l2p,
925 		},
926 		{
927 			.name = "Finalize band initialization",
928 			.action = ftl_mngt_finalize_init_bands,
929 		},
930 		{
931 			.name = "Free P2L region bufs",
932 			.action = ftl_mngt_p2l_free_bufs,
933 		},
934 		{
935 			.name = "Start core poller",
936 			.action = ftl_mngt_start_core_poller,
937 			.cleanup = ftl_mngt_stop_core_poller
938 		},
939 		{
940 			.name = "Self test on startup",
941 			.action = ftl_mngt_self_test
942 		},
943 		{
944 			.name = "Finalize initialization",
945 			.action = ftl_mngt_finalize_startup,
946 		},
947 		{}
948 	}
949 };
950 
951 /*
952  * Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
953  * L2P cache (paged in/out status, dirtiness etc. of individual pages).
954  */
955 static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
956 	.name = "FTL recovery from SHM",
957 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
958 	.steps = {
959 		{
960 			.name = "Restore L2P from SHM",
961 			.action = ftl_mngt_restore_l2p,
962 		},
963 		{
964 			.name = "Restore valid maps counters",
965 			.action = ftl_mngt_restore_valid_counters,
966 		},
967 		{
968 			.name = "Complete unmap transaction",
969 			.action = ftl_mngt_complete_unmap,
970 		},
971 		{}
972 	}
973 };
974 
975 void
976 ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
977 {
978 	ftl_mngt_call_process(mngt, &g_desc_recovery);
979 }
980