xref: /spdk/lib/ftl/mngt/ftl_mngt_recovery.c (revision 18c8b52afa69f39481ebb75711b2f30b11693f9d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/bdev_module.h"
7 
8 #include "ftl_nv_cache.h"
9 #include "ftl_core.h"
10 #include "ftl_utils.h"
11 #include "ftl_band.h"
12 #include "ftl_internal.h"
13 #include "ftl_l2p_cache.h"
14 #include "ftl_mngt.h"
15 #include "ftl_mngt_steps.h"
16 #include "utils/ftl_addr_utils.h"
17 
18 struct ftl_mngt_recovery_ctx {
19 	/* Main recovery FTL management process */
20 	struct ftl_mngt_process *main;
21 	int status;
22 	TAILQ_HEAD(, ftl_band) open_bands;
23 	uint64_t open_bands_num;
24 	struct {
25 		struct ftl_layout_region region;
26 		struct ftl_md *md;
27 		uint64_t *l2p;
28 		uint64_t *seq_id;
29 		uint64_t count;
30 	} l2p_snippet;
31 	struct {
32 		uint64_t block_limit;
33 		uint64_t lba_first;
34 		uint64_t lba_last;
35 		uint32_t i;
36 	} iter;
37 	uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
38 };
39 
40 static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
41 static const struct ftl_mngt_process_desc g_desc_recovery;
42 static const struct ftl_mngt_process_desc g_desc_recovery_shm;
43 
44 static bool
45 recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
46 {
47 	return 0 == ctx->l2p_snippet.region.current.blocks;
48 }
49 
50 static void
51 recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
52 {
53 	struct ftl_layout_region *region, *snippet;
54 	uint64_t first_block, last_blocks;
55 
56 	ctx->iter.i++;
57 	region = &dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
58 	snippet = &ctx->l2p_snippet.region;
59 
60 	/* Advance processed blocks */
61 	snippet->current.offset += snippet->current.blocks;
62 	snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
63 	snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
64 
65 	first_block = snippet->current.offset - region->current.offset;
66 	ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
67 
68 	last_blocks = first_block + snippet->current.blocks;
69 	ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
70 
71 	if (ctx->iter.lba_last > dev->num_lbas) {
72 		ctx->iter.lba_last = dev->num_lbas;
73 	}
74 }
75 
76 static void
77 ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
78 {
79 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
80 	const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
81 	uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
82 	uint64_t l2p_limit_block, seq_limit_block, md_blocks;
83 	int md_flags;
84 
85 	ctx->main = mngt;
86 
87 	if (ftl_fast_recovery(dev)) {
88 		/* If shared memory fast recovery then we don't need temporary buffers */
89 		ftl_mngt_next_step(mngt);
90 		return;
91 	}
92 
93 	/*
94 	 * Recovery process allocates temporary buffers, to not exceed memory limit free L2P
95 	 * metadata buffers if they exist, they will be recreated in L2P initialization phase
96 	 */
97 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
98 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
99 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
100 
101 	/* Below values are in byte unit */
102 	mem_limit = dev->conf.l2p_dram_limit * MiB;
103 	mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
104 			     MiB) * MiB);
105 
106 	lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
107 	l2p_limit = lba_limit * dev->layout.l2p.addr_size;
108 	iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
109 
110 	ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
111 
112 	/* Round to block size */
113 	ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
114 
115 	seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
116 
117 	FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
118 	FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
119 	FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
120 	FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
121 	dev->sb->ckpt_seq_id = 0;
122 
123 	/* Initialize region */
124 	ctx->l2p_snippet.region = dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P];
125 	/* Limit blocks in region, it will be needed for ftl_md_set_region */
126 	ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
127 
128 	l2p_limit_block = ctx->iter.block_limit;
129 	seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
130 
131 	md_blocks = l2p_limit_block + seq_limit_block;
132 	md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
133 
134 	/* Initialize snippet of L2P metadata */
135 	ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
136 					    &ctx->l2p_snippet.region);
137 	if (!ctx->l2p_snippet.md) {
138 		ftl_mngt_fail_step(mngt);
139 		return;
140 	}
141 
142 	ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
143 
144 	/* Initialize recovery iterator, we call it with blocks set to zero,
145 	 * it means zero block done (processed), thanks that it will recalculate
146 	 *  offsets and starting LBA to initial position */
147 	ctx->l2p_snippet.region.current.blocks = 0;
148 	recovery_iter_advance(dev, ctx);
149 
150 	/* Initialize snippet of sequence IDs */
151 	ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
152 					       (l2p_limit_block * FTL_BLOCK_SIZE));
153 
154 	TAILQ_INIT(&ctx->open_bands);
155 	ftl_mngt_next_step(mngt);
156 }
157 
158 static void
159 ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
160 {
161 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
162 
163 	ftl_md_destroy(ctx->l2p_snippet.md, 0);
164 	ctx->l2p_snippet.md = NULL;
165 	ctx->l2p_snippet.seq_id = NULL;
166 
167 	ftl_mngt_next_step(mngt);
168 }
169 
170 static void
171 recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
172 {
173 	struct ftl_mngt_recovery_ctx *ctx = _ctx;
174 
175 	recovery_iter_advance(dev, ctx);
176 
177 	if (status) {
178 		ftl_mngt_fail_step(ctx->main);
179 	} else {
180 		ftl_mngt_continue_step(ctx->main);
181 	}
182 }
183 
184 static void
185 ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
186 {
187 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
188 
189 	if (ftl_fast_recovery(dev)) {
190 		ftl_mngt_skip_step(mngt);
191 		return;
192 	}
193 
194 	if (recovery_iter_done(dev, ctx)) {
195 		ftl_mngt_next_step(mngt);
196 	} else {
197 		ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
198 	}
199 }
200 
201 static void
202 restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
203 {
204 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
205 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
206 	struct ftl_band *band;
207 	uint64_t num_bands = ftl_get_num_bands(dev);
208 	uint64_t i;
209 
210 	if (status) {
211 		/* Restore error, end step */
212 		ftl_mngt_fail_step(mngt);
213 		return;
214 	}
215 
216 	for (i = 0; i < num_bands; i++) {
217 		band = &dev->bands[i];
218 
219 		switch (band->md->state) {
220 		case FTL_BAND_STATE_FREE:
221 			ftl_band_initialize_free_state(band);
222 			break;
223 		case FTL_BAND_STATE_OPEN:
224 			TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
225 			TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
226 			break;
227 		case FTL_BAND_STATE_CLOSED:
228 			break;
229 		default:
230 			status = -EINVAL;
231 		}
232 	}
233 
234 	if (status) {
235 		ftl_mngt_fail_step(mngt);
236 	} else {
237 		ftl_mngt_next_step(mngt);
238 	}
239 }
240 
241 static void
242 ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
243 {
244 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
245 
246 	md->owner.cb_ctx = mngt;
247 	md->cb = restore_band_state_cb;
248 	ftl_md_restore(md);
249 }
250 
251 struct band_md_ctx {
252 	int status;
253 	uint64_t qd;
254 	uint64_t id;
255 };
256 
257 static void
258 ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt,
259 				    ftl_band_md_cb cb)
260 {
261 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
262 	uint64_t num_bands = ftl_get_num_bands(dev);
263 
264 	/*
265 	 * This function generates a high queue depth and will utilize ftl_mngt_continue_step during completions to make sure all bands
266 	 * are processed before returning an error (if any were found) or continuing on.
267 	 */
268 	if (0 == sctx->qd && sctx->id == num_bands) {
269 		if (sctx->status) {
270 			ftl_mngt_fail_step(mngt);
271 		} else {
272 			ftl_mngt_next_step(mngt);
273 		}
274 		return;
275 	}
276 
277 	while (sctx->id < num_bands) {
278 		struct ftl_band *band = &dev->bands[sctx->id];
279 
280 		if (FTL_BAND_STATE_FREE == band->md->state) {
281 			sctx->id++;
282 			continue;
283 		}
284 
285 		if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
286 			/* This band is already open and has valid P2L map */
287 			sctx->id++;
288 			sctx->qd++;
289 			ftl_band_acquire_p2l_map(band);
290 			cb(band, mngt, FTL_MD_SUCCESS);
291 			continue;
292 		} else {
293 			if (dev->sb->ckpt_seq_id && (band->md->close_seq_id <= dev->sb->ckpt_seq_id)) {
294 				sctx->id++;
295 				continue;
296 			}
297 
298 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
299 			if (ftl_band_alloc_p2l_map(band)) {
300 				/* No more free P2L map, try later */
301 				break;
302 			}
303 		}
304 
305 		sctx->id++;
306 		ftl_band_read_tail_brq_md(band, cb, mngt);
307 		sctx->qd++;
308 	}
309 
310 	if (0 == sctx->qd) {
311 		/*
312 		 * No QD could happen due to all leftover bands being in free state.
313 		 * For streamlining of all potential error handling (since many bands are reading P2L at the same time),
314 		 * we're using ftl_mngt_continue_step to arrive at the same spot of checking for mngt step end (see beginning of function).
315 		 */
316 		ftl_mngt_continue_step(mngt);
317 	}
318 }
319 
320 static void
321 ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
322 {
323 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
324 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
325 	uint64_t *trim_map = ftl_md_get_buffer(md);
326 	uint64_t page_id, trim_seq_id;
327 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
328 	uint64_t lba, lba_off;
329 
330 	if (dev->sb->ckpt_seq_id) {
331 		FTL_ERRLOG(dev, "Checkpoint recovery not supported!\n");
332 		ftl_mngt_fail_step(mngt);
333 		return;
334 	}
335 
336 	for (lba = ctx->iter.lba_first; lba < ctx->iter.lba_last; lba++) {
337 		lba_off = lba - ctx->iter.lba_first;
338 		page_id = lba / lbas_in_page;
339 
340 		assert(page_id < ftl_md_get_buffer_size(md) / sizeof(*trim_map));
341 		assert(page_id < dev->layout.region[FTL_LAYOUT_REGION_TYPE_L2P].current.blocks);
342 		assert(lba_off < ctx->l2p_snippet.count);
343 
344 		trim_seq_id = trim_map[page_id];
345 
346 		ctx->l2p_snippet.seq_id[lba_off] = trim_seq_id;
347 		ftl_addr_store(dev, ctx->l2p_snippet.l2p, lba_off, FTL_ADDR_INVALID);
348 	}
349 
350 	ftl_mngt_next_step(mngt);
351 }
352 
353 static void
354 l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
355 {
356 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
357 
358 	if (status) {
359 		ftl_mngt_fail_step(mngt);
360 	} else {
361 		ftl_mngt_next_step(mngt);
362 	}
363 }
364 
365 static void
366 ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
367 {
368 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
369 	struct ftl_md *md = ctx->l2p_snippet.md;
370 	struct ftl_layout_region *region = &ctx->l2p_snippet.region;
371 
372 	FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
373 	FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
374 		      region->current.offset, region->current.offset + region->current.blocks,
375 		      ctx->iter.lba_first, ctx->iter.lba_last);
376 
377 	if (ftl_md_set_region(md, &ctx->l2p_snippet.region)) {
378 		ftl_mngt_fail_step(mngt);
379 		return;
380 	}
381 
382 	md->owner.cb_ctx = mngt;
383 	md->cb = l2p_cb;
384 	ftl_md_restore(md);
385 }
386 
387 static void
388 ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
389 {
390 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
391 	struct ftl_md *md = ctx->l2p_snippet.md;
392 
393 	md->owner.cb_ctx = mngt;
394 	md->cb = l2p_cb;
395 	ftl_md_persist(md);
396 }
397 
398 static void
399 restore_band_l2p_cb(struct ftl_band *band, void *cntx, enum ftl_md_status status)
400 {
401 	struct ftl_mngt_process *mngt = cntx;
402 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
403 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
404 	struct spdk_ftl_dev *dev = band->dev;
405 	ftl_addr addr, curr_addr;
406 	uint64_t i, lba, seq_id, num_blks_in_band;
407 	uint32_t band_map_crc;
408 	int rc = 0;
409 
410 	if (status != FTL_MD_SUCCESS) {
411 		FTL_ERRLOG(dev, "L2P band restore error, failed to read P2L map\n");
412 		rc = -EIO;
413 		goto cleanup;
414 	}
415 
416 	band_map_crc = spdk_crc32c_update(band->p2l_map.band_map,
417 					  ftl_tail_md_num_blocks(band->dev) * FTL_BLOCK_SIZE, 0);
418 
419 	/* P2L map is only valid if the band state is closed */
420 	if (FTL_BAND_STATE_CLOSED == band->md->state && band->md->p2l_map_checksum != band_map_crc) {
421 		FTL_ERRLOG(dev, "L2P band restore error, inconsistent P2L map CRC\n");
422 		rc = -EINVAL;
423 		goto cleanup;
424 	}
425 
426 	num_blks_in_band = ftl_get_num_blocks_in_band(dev);
427 	for (i = 0; i < num_blks_in_band; ++i) {
428 		uint64_t lba_off;
429 		lba = band->p2l_map.band_map[i].lba;
430 		seq_id = band->p2l_map.band_map[i].seq_id;
431 
432 		if (lba == FTL_LBA_INVALID) {
433 			continue;
434 		}
435 		if (lba >= dev->num_lbas) {
436 			FTL_ERRLOG(dev, "L2P band restore ERROR, LBA out of range\n");
437 			rc = -EINVAL;
438 			break;
439 		}
440 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
441 			continue;
442 		}
443 
444 		lba_off = lba - pctx->iter.lba_first;
445 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
446 
447 			/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
448 			if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
449 				addr = ftl_band_addr_from_block_offset(band, i);
450 				ftl_band_set_p2l(band, FTL_LBA_INVALID, addr, 0);
451 			}
452 
453 			/* Newer data already recovered */
454 			continue;
455 		}
456 
457 		addr = ftl_band_addr_from_block_offset(band, i);
458 
459 		curr_addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
460 
461 		/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
462 		if (curr_addr != FTL_ADDR_INVALID && !ftl_addr_in_nvc(dev, curr_addr) && curr_addr != addr) {
463 			struct ftl_band *curr_band = ftl_band_from_addr(dev, curr_addr);
464 
465 			if (FTL_BAND_STATE_OPEN == curr_band->md->state || FTL_BAND_STATE_FULL == curr_band->md->state) {
466 				size_t prev_offset = ftl_band_block_offset_from_addr(curr_band, curr_addr);
467 				if (curr_band->p2l_map.band_map[prev_offset].lba == lba &&
468 				    seq_id >= curr_band->p2l_map.band_map[prev_offset].seq_id) {
469 					ftl_band_set_p2l(curr_band, FTL_LBA_INVALID, curr_addr, 0);
470 				}
471 			}
472 		}
473 
474 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
475 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
476 	}
477 
478 
479 cleanup:
480 	ftl_band_release_p2l_map(band);
481 
482 	sctx->qd--;
483 	if (rc) {
484 		sctx->status = rc;
485 	}
486 
487 	ftl_mngt_continue_step(mngt);
488 }
489 
490 static void
491 ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev *dev,
492 		struct ftl_mngt_process *mngt)
493 {
494 	ftl_mngt_recovery_walk_band_tail_md(dev, mngt, restore_band_l2p_cb);
495 }
496 
497 static int
498 restore_chunk_l2p_cb(struct ftl_nv_cache_chunk *chunk, void *ctx)
499 {
500 	struct ftl_mngt_recovery_ctx *pctx = ctx;
501 	struct spdk_ftl_dev *dev;
502 	struct ftl_nv_cache *nv_cache = chunk->nv_cache;
503 	ftl_addr addr;
504 	const uint64_t seq_id = chunk->md->seq_id;
505 	uint64_t i, lba;
506 	uint32_t chunk_map_crc;
507 
508 	dev = SPDK_CONTAINEROF(chunk->nv_cache, struct spdk_ftl_dev, nv_cache);
509 
510 	chunk_map_crc = spdk_crc32c_update(chunk->p2l_map.chunk_map,
511 					   ftl_nv_cache_chunk_tail_md_num_blocks(chunk->nv_cache) * FTL_BLOCK_SIZE, 0);
512 	if (chunk->md->p2l_map_checksum != chunk_map_crc) {
513 		return -1;
514 	}
515 
516 	for (i = 0; i < nv_cache->chunk_blocks; ++i) {
517 		uint64_t lba_off;
518 
519 		lba = ftl_chunk_map_get_lba(chunk, i);
520 
521 		if (lba == FTL_LBA_INVALID) {
522 			continue;
523 		}
524 		if (lba >= dev->num_lbas) {
525 			FTL_ERRLOG(dev, "L2P Chunk restore ERROR, LBA out of range\n");
526 			return -1;
527 		}
528 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
529 			continue;
530 		}
531 
532 		lba_off = lba - pctx->iter.lba_first;
533 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
534 			/* Newer data already recovered */
535 			continue;
536 		}
537 
538 		addr = ftl_addr_from_nvc_offset(dev, chunk->offset + i);
539 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
540 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
541 	}
542 
543 	return 0;
544 }
545 
546 static void
547 ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev *dev,
548 		struct ftl_mngt_process *mngt)
549 {
550 	ftl_mngt_nv_cache_restore_l2p(dev, mngt, restore_chunk_l2p_cb, ftl_mngt_get_caller_ctx(mngt));
551 }
552 
553 static void
554 ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
555 		struct ftl_mngt_process *mngt)
556 {
557 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
558 	uint64_t lba, lba_off;
559 	ftl_addr addr;
560 
561 	for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
562 		lba_off = lba - pctx->iter.lba_first;
563 		addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
564 
565 		if (addr == FTL_ADDR_INVALID) {
566 			continue;
567 		}
568 
569 		if (!ftl_addr_in_nvc(dev, addr)) {
570 			struct ftl_band *band = ftl_band_from_addr(dev, addr);
571 			band->p2l_map.num_valid++;
572 		}
573 
574 		if (ftl_bitmap_get(dev->valid_map, addr)) {
575 			assert(false);
576 			ftl_mngt_fail_step(mngt);
577 			return;
578 		} else {
579 			ftl_bitmap_set(dev->valid_map, addr);
580 		}
581 	}
582 
583 	ftl_mngt_next_step(mngt);
584 }
585 
586 static void
587 p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
588 {
589 	uint64_t seq_id;
590 	int md_region, ckpt_id;
591 
592 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
593 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
594 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
595 		seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
596 		pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
597 		FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
598 	}
599 }
600 
601 static int
602 p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
603 {
604 	uint64_t seq_id;
605 	int md_region, ckpt_id;
606 
607 	memset(band->p2l_map.band_map, -1,
608 	       FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
609 
610 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
611 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
612 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
613 		seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
614 		if (seq_id == band->md->seq) {
615 			FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
616 				      PRIu64"\n", band->id, ckpt_id, seq_id);
617 			return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
618 		}
619 	}
620 
621 	/* Band opened but no valid blocks within it, set write pointer to 0 */
622 	ftl_band_iter_init(band);
623 	FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
624 		      " match any P2L checkpoint\n", band->id, band->md->seq);
625 	return 0;
626 }
627 
628 static void
629 ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
630 {
631 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
632 
633 	p2l_ckpt_preprocess(dev, pctx);
634 	ftl_mngt_next_step(mngt);
635 }
636 
637 static void
638 ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
639 {
640 	ftl_recover_max_seq(dev);
641 	ftl_mngt_next_step(mngt);
642 }
643 
644 static void
645 ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
646 {
647 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
648 	struct ftl_band *band;
649 
650 	if (TAILQ_EMPTY(&pctx->open_bands)) {
651 		FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
652 		if (pctx->status) {
653 			ftl_mngt_fail_step(mngt);
654 		} else {
655 			ftl_mngt_next_step(mngt);
656 		}
657 		return;
658 	}
659 
660 	if (!ftl_mngt_get_step_ctx(mngt)) {
661 		ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
662 
663 		/* Step first time called, initialize */
664 		TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
665 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
666 			if (ftl_band_alloc_p2l_map(band)) {
667 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate P2L map\n");
668 				ftl_mngt_fail_step(mngt);
669 				return;
670 			}
671 
672 			if (p2l_ckpt_restore_p2l(pctx, band)) {
673 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
674 				ftl_mngt_fail_step(mngt);
675 				return;
676 			}
677 
678 			if (!band->p2l_map.p2l_ckpt) {
679 				band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
680 				if (!band->p2l_map.p2l_ckpt) {
681 					FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
682 					ftl_mngt_fail_step(mngt);
683 					return;
684 				}
685 			}
686 		}
687 	}
688 
689 	band = TAILQ_FIRST(&pctx->open_bands);
690 
691 	if (ftl_band_filled(band, band->md->iter.offset)) {
692 		band->md->state = FTL_BAND_STATE_FULL;
693 	}
694 
695 	/* In a next step (finalize band initialization) this band will
696 	 * be assigned to the writer. So temporary we move this band
697 	 * to the closed list, and in the next step it will be moved to
698 	 * the writer from such list.
699 	 */
700 	TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
701 	TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
702 
703 	FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
704 		      band->id, band->md->seq, band->md->iter.offset);
705 
706 	ftl_mngt_continue_step(mngt);
707 }
708 
709 static void
710 ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
711 {
712 	ftl_valid_map_load_state(dev);
713 	ftl_mngt_next_step(mngt);
714 }
715 
716 static void
717 ftl_mngt_complete_unmap_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
718 {
719 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
720 
721 	dev->sb_shm->trim.in_progress = false;
722 
723 	if (!status) {
724 		ftl_mngt_next_step(mngt);
725 	} else {
726 		ftl_mngt_fail_step(mngt);
727 	}
728 }
729 
730 static void
731 ftl_mngt_complete_unmap(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
732 {
733 	uint64_t start_lba, num_blocks, seq_id;
734 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
735 
736 	if (dev->sb_shm->trim.in_progress) {
737 		start_lba = dev->sb_shm->trim.start_lba;
738 		num_blocks = dev->sb_shm->trim.num_blocks;
739 		seq_id = dev->sb_shm->trim.seq_id;
740 
741 		assert(seq_id <= dev->sb->seq_id);
742 
743 		FTL_NOTICELOG(dev, "Uncomplete unmap detected lba: %"PRIu64" num_blocks: %"PRIu64"\n",
744 			      start_lba, num_blocks);
745 
746 		ftl_set_unmap_map(dev, start_lba, num_blocks, seq_id);
747 	}
748 
749 	md->owner.cb_ctx = mngt;
750 	md->cb = ftl_mngt_complete_unmap_cb;
751 
752 	ftl_md_persist(md);
753 }
754 
755 static void
756 ftl_mngt_recover_unmap_map_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
757 {
758 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
759 	uint64_t num_md_blocks, first_page, num_pages;
760 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
761 	uint64_t *page = ftl_md_get_buffer(md);
762 	union ftl_md_vss *page_vss = ftl_md_get_vss_buffer(md);
763 	uint64_t lba, num_blocks, vss_seq_id;
764 	size_t i, j;
765 
766 	if (status) {
767 		ftl_mngt_fail_step(mngt);
768 		return;
769 	}
770 
771 	num_md_blocks = ftl_md_get_buffer_size(md) / lbas_in_page;
772 
773 	for (i = 0; i < num_md_blocks; ++i, page_vss++) {
774 		lba = page_vss->unmap.start_lba;
775 		num_blocks = page_vss->unmap.num_blocks;
776 		vss_seq_id = page_vss->unmap.seq_id;
777 
778 		first_page = lba / lbas_in_page;
779 		num_pages = num_blocks / lbas_in_page;
780 
781 		if (lba % lbas_in_page || num_blocks % lbas_in_page) {
782 			ftl_mngt_fail_step(mngt);
783 			return;
784 		}
785 
786 		for (j = first_page; j < first_page + num_pages; ++j) {
787 			page[j] = spdk_max(vss_seq_id, page[j]);
788 		}
789 	}
790 
791 	ftl_mngt_next_step(mngt);
792 }
793 
794 static void
795 ftl_mngt_recover_unmap_map(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
796 {
797 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
798 
799 	if (ftl_fast_recovery(dev)) {
800 		FTL_DEBUGLOG(dev, "SHM: skipping unmap map recovery\n");
801 		ftl_mngt_next_step(mngt);
802 		return;
803 	}
804 
805 	md->owner.cb_ctx = mngt;
806 	md->cb = ftl_mngt_recover_unmap_map_cb;
807 	ftl_md_restore(md);
808 }
809 
810 static void
811 ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
812 {
813 	if (ftl_fast_recovery(dev)) {
814 		ftl_mngt_call_process(mngt, &g_desc_recovery_shm);
815 	} else {
816 		ftl_mngt_skip_step(mngt);
817 	}
818 }
819 
820 /*
821  * During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
822  * recreating it all at the same time may take up to much DRAM, so it's done in multiple
823  * iterations. This process describes the recovery of a part of L2P in one iteration.
824  */
825 static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
826 	.name = "FTL recovery iteration",
827 	.steps = {
828 		{
829 			.name = "Load L2P",
830 			.action = ftl_mngt_recovery_iteration_load_l2p,
831 		},
832 		{
833 			.name = "Initialize sequence IDs",
834 			.action = ftl_mngt_recovery_iteration_init_seq_ids,
835 		},
836 		{
837 			.name = "Restore chunk L2P",
838 			.action = ftl_mngt_recovery_iteration_restore_chunk_l2p,
839 		},
840 		{
841 			.name = "Restore band L2P",
842 			.ctx_size = sizeof(struct band_md_ctx),
843 			.action = ftl_mngt_recovery_iteration_restore_band_l2p,
844 		},
845 		{
846 			.name = "Restore valid map",
847 			.action = ftl_mngt_recovery_iteration_restore_valid_map,
848 		},
849 		{
850 			.name = "Save L2P",
851 			.action = ftl_mngt_recovery_iteration_save_l2p,
852 		},
853 		{}
854 	}
855 };
856 
857 /*
858  * Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
859  * iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
860  */
861 static const struct ftl_mngt_process_desc g_desc_recovery = {
862 	.name = "FTL recovery",
863 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
864 	.steps = {
865 		{
866 			.name = "Initialize recovery",
867 			.action = ftl_mngt_recovery_init,
868 			.cleanup = ftl_mngt_recovery_deinit
869 		},
870 		{
871 			.name = "Recover band state",
872 			.action = ftl_mngt_recovery_restore_band_state,
873 		},
874 		{
875 			.name = "Initialize P2L checkpointing",
876 			.action = ftl_mngt_p2l_init_ckpt,
877 			.cleanup = ftl_mngt_p2l_deinit_ckpt
878 		},
879 		{
880 			.name = "Restore P2L checkpoints",
881 			.action = ftl_mngt_p2l_restore_ckpt
882 		},
883 		{
884 			.name = "Preprocess P2L checkpoints",
885 			.action = ftl_mngt_recovery_pre_process_p2l
886 		},
887 		{
888 			.name = "Recover open bands P2L",
889 			.action = ftl_mngt_recovery_open_bands_p2l
890 		},
891 		{
892 			.name = "Recover chunk state",
893 			.action = ftl_mngt_nv_cache_restore_chunk_state
894 		},
895 		{
896 			.name = "Recover max seq ID",
897 			.action = ftl_mngt_recover_seq_id
898 		},
899 		{
900 			.name = "Recover unmap map",
901 			.action = ftl_mngt_recover_unmap_map
902 		},
903 		{
904 			.name = "Recover open chunks P2L",
905 			.action = ftl_mngt_nv_cache_recover_open_chunk
906 		},
907 		{
908 			.name = "Recovery iterations",
909 			.action = ftl_mngt_recovery_run_iteration,
910 		},
911 		{
912 			.name = "Deinitialize recovery",
913 			.action = ftl_mngt_recovery_deinit
914 		},
915 		{
916 			.name = "Initialize L2P",
917 			.action = ftl_mngt_init_l2p,
918 			.cleanup = ftl_mngt_deinit_l2p
919 		},
920 		{
921 			.name = "Recover L2P from shared memory",
922 			.action = ftl_mngt_recovery_shm_l2p,
923 		},
924 		{
925 			.name = "Finalize band initialization",
926 			.action = ftl_mngt_finalize_init_bands,
927 		},
928 		{
929 			.name = "Free P2L region bufs",
930 			.action = ftl_mngt_p2l_free_bufs,
931 		},
932 		{
933 			.name = "Start core poller",
934 			.action = ftl_mngt_start_core_poller,
935 			.cleanup = ftl_mngt_stop_core_poller
936 		},
937 		{
938 			.name = "Self test on startup",
939 			.action = ftl_mngt_self_test
940 		},
941 		{
942 			.name = "Finalize initialization",
943 			.action = ftl_mngt_finalize_startup,
944 		},
945 		{}
946 	}
947 };
948 
949 /*
950  * Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
951  * L2P cache (paged in/out status, dirtiness etc. of individual pages).
952  */
953 static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
954 	.name = "FTL recovery from SHM",
955 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
956 	.steps = {
957 		{
958 			.name = "Restore L2P from SHM",
959 			.action = ftl_mngt_restore_l2p,
960 		},
961 		{
962 			.name = "Restore valid maps counters",
963 			.action = ftl_mngt_restore_valid_counters,
964 		},
965 		{
966 			.name = "Complete unmap transaction",
967 			.action = ftl_mngt_complete_unmap,
968 		},
969 		{}
970 	}
971 };
972 
973 void
974 ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
975 {
976 	ftl_mngt_call_process(mngt, &g_desc_recovery);
977 }
978