xref: /spdk/lib/ftl/utils/ftl_md.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright 2023 Solidigm All Rights Reserved
3  *   Copyright (C) 2022 Intel Corporation.
4  *   All rights reserved.
5  */
6 
7 #include "spdk/env.h"
8 #include "spdk/bdev_module.h"
9 
10 #include "ftl_core.h"
11 #include "ftl_md.h"
12 #include "ftl_nv_cache_io.h"
13 
14 struct ftl_md;
15 static void io_submit(struct ftl_md *md);
16 static void io_done(struct ftl_md *md);
17 
18 static bool
19 has_mirror(struct ftl_md *md)
20 {
21 	if (md->region) {
22 		if (md->region->mirror_type != FTL_LAYOUT_REGION_TYPE_INVALID) {
23 			return md->mirror_enabled;
24 		}
25 	}
26 
27 	return false;
28 }
29 
30 static struct ftl_md *
31 ftl_md_get_mirror(struct ftl_md *md)
32 {
33 	if (has_mirror(md)) {
34 		return md->dev->layout.md[md->region->mirror_type];
35 	}
36 
37 	return NULL;
38 }
39 
40 uint64_t
41 ftl_md_xfer_blocks(struct spdk_ftl_dev *dev)
42 {
43 	return 4ULL * dev->xfer_size;
44 }
45 
46 static uint64_t
47 xfer_size(struct ftl_md *md)
48 {
49 	return ftl_md_xfer_blocks(md->dev) * FTL_BLOCK_SIZE;
50 }
51 
52 static void
53 ftl_md_create_spdk_buf(struct ftl_md *md, uint64_t vss_blksz)
54 {
55 	md->shm_fd = -1;
56 	md->vss_data = NULL;
57 	md->data = spdk_zmalloc(md->data_blocks * (FTL_BLOCK_SIZE + vss_blksz), FTL_BLOCK_SIZE, NULL,
58 				SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
59 
60 	if (md->data && vss_blksz) {
61 		md->vss_data = ((char *)md->data) + md->data_blocks * FTL_BLOCK_SIZE;
62 	}
63 }
64 
65 static void
66 ftl_md_create_heap(struct ftl_md *md, uint64_t vss_blksz)
67 {
68 	md->shm_fd = -1;
69 	md->vss_data = NULL;
70 	md->data = calloc(md->data_blocks, FTL_BLOCK_SIZE + vss_blksz);
71 
72 	if (md->data && vss_blksz) {
73 		md->vss_data = ((char *)md->data) + md->data_blocks * FTL_BLOCK_SIZE;
74 	}
75 }
76 
77 static void
78 ftl_md_destroy_spdk_buf(struct ftl_md *md)
79 {
80 	if (md->data) {
81 		spdk_free(md->data);
82 		md->data = NULL;
83 		md->vss_data = NULL;
84 	}
85 }
86 
87 static void
88 ftl_md_destroy_heap(struct ftl_md *md)
89 {
90 	if (md->data) {
91 		free(md->data);
92 		md->data = NULL;
93 		md->vss_data = NULL;
94 	}
95 }
96 
97 static int
98 ftl_wrapper_open(const char *name, int of, mode_t m)
99 {
100 	return open(name, of, m);
101 }
102 
103 static void
104 ftl_md_setup_obj(struct ftl_md *md, int flags,
105 		 const char *name)
106 {
107 	char uuid_str[SPDK_UUID_STRING_LEN];
108 	const char *fmt;
109 
110 	if (!(flags & FTL_MD_CREATE_SHM)) {
111 		assert(false);
112 		return;
113 	}
114 
115 	/* TODO: temporary, define a proper hugetlbfs mountpoint */
116 	fmt = "/dev/hugepages/ftl_%s_%s";
117 	md->shm_mmap_flags = MAP_SHARED;
118 	md->shm_open = ftl_wrapper_open;
119 	md->shm_unlink = unlink;
120 
121 	if (name == NULL ||
122 	    spdk_uuid_fmt_lower(uuid_str, SPDK_UUID_STRING_LEN, &md->dev->conf.uuid) ||
123 	    snprintf(md->name, sizeof(md->name) / sizeof(md->name[0]),
124 		     fmt, uuid_str, name) <= 0) {
125 		md->name[0] = 0;
126 	}
127 }
128 
129 static void
130 ftl_md_invalidate_shm(struct ftl_md *md)
131 {
132 	if (md->dev->sb_shm && md->dev->sb_shm->shm_ready) {
133 		md->dev->init_retry = true;
134 		md->dev->sb_shm->shm_ready = false;
135 	}
136 }
137 
138 static void
139 ftl_md_create_shm(struct ftl_md *md, uint64_t vss_blksz, int flags)
140 {
141 	struct stat shm_stat;
142 	size_t vss_blk_offs;
143 	void *shm_ptr;
144 	int open_flags = O_RDWR;
145 	mode_t open_mode = S_IRUSR | S_IWUSR;
146 
147 	assert(md->shm_open && md->shm_unlink);
148 	md->data = NULL;
149 	md->vss_data = NULL;
150 	md->shm_sz = 0;
151 
152 	/* Must have an object name */
153 	if (md->name[0] == 0) {
154 		assert(false);
155 		return;
156 	}
157 
158 	/* If specified, unlink before create a new SHM object */
159 	if (flags & FTL_MD_CREATE_SHM_NEW) {
160 		if (md->shm_unlink(md->name) < 0 && errno != ENOENT) {
161 			ftl_md_invalidate_shm(md);
162 			return;
163 		}
164 		open_flags += O_CREAT | O_TRUNC;
165 	}
166 
167 	/* Open existing or create a new SHM object, then query its props */
168 	md->shm_fd = md->shm_open(md->name, open_flags, open_mode);
169 	if (md->shm_fd < 0 || fstat(md->shm_fd, &shm_stat) < 0) {
170 		goto err_shm;
171 	}
172 
173 	/* Verify open mode hasn't changed */
174 	if ((shm_stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) != open_mode) {
175 		goto err_shm;
176 	}
177 
178 	/* Round up the SHM obj size to the nearest blk size (i.e. page size) */
179 	md->shm_sz = spdk_divide_round_up(md->data_blocks * FTL_BLOCK_SIZE, shm_stat.st_blksize);
180 
181 	/* Add some blks for VSS metadata */
182 	vss_blk_offs = md->shm_sz;
183 
184 	if (vss_blksz) {
185 		md->shm_sz += spdk_divide_round_up(md->data_blocks * vss_blksz,
186 						   shm_stat.st_blksize);
187 	}
188 
189 	/* Total SHM obj size */
190 	md->shm_sz *= shm_stat.st_blksize;
191 
192 	/* Set or check the object size - zero init`d in case of set (FTL_MD_CREATE_SHM_NEW) */
193 	if ((shm_stat.st_size == 0 && (ftruncate(md->shm_fd, md->shm_sz) < 0 ||
194 				       (flags & FTL_MD_CREATE_SHM_NEW) == 0))
195 	    || (shm_stat.st_size > 0 && (size_t)shm_stat.st_size != md->shm_sz)) {
196 		goto err_shm;
197 	}
198 
199 	/* Create a virtual memory mapping for the object */
200 	shm_ptr = mmap(NULL, md->shm_sz, PROT_READ | PROT_WRITE, md->shm_mmap_flags,
201 		       md->shm_fd, 0);
202 	if (shm_ptr == MAP_FAILED) {
203 		goto err_shm;
204 	}
205 
206 	md->data = shm_ptr;
207 	if (vss_blksz) {
208 		md->vss_data = ((char *)shm_ptr) + vss_blk_offs * shm_stat.st_blksize;
209 	}
210 
211 	/* Lock the pages in memory (i.e. prevent the pages to be paged out) */
212 	if (mlock(md->data, md->shm_sz) < 0) {
213 		goto err_map;
214 	}
215 
216 	if (spdk_mem_register(md->data, md->shm_sz)) {
217 		goto err_mlock;
218 	}
219 	md->mem_reg = true;
220 
221 	return;
222 
223 	/* Cleanup upon fault */
224 err_mlock:
225 	munlock(md->data, md->shm_sz);
226 
227 err_map:
228 	munmap(md->data, md->shm_sz);
229 	md->data = NULL;
230 	md->vss_data = NULL;
231 	md->shm_sz = 0;
232 
233 err_shm:
234 	if (md->shm_fd >= 0) {
235 		close(md->shm_fd);
236 		md->shm_unlink(md->name);
237 		md->shm_fd = -1;
238 	}
239 	ftl_md_invalidate_shm(md);
240 }
241 
242 static void
243 ftl_md_destroy_shm(struct ftl_md *md, int flags)
244 {
245 	if (!md->data) {
246 		return;
247 	}
248 
249 	assert(md->shm_sz > 0);
250 	if (md->mem_reg) {
251 		spdk_mem_unregister(md->data, md->shm_sz);
252 		md->mem_reg = false;
253 	}
254 
255 	/* Unlock the pages in memory */
256 	munlock(md->data, md->shm_sz);
257 
258 	/* Remove the virtual memory mapping for the object */
259 	munmap(md->data, md->shm_sz);
260 
261 	/* Close SHM object fd */
262 	close(md->shm_fd);
263 
264 	md->data = NULL;
265 	md->vss_data = NULL;
266 
267 	/* If specified, keep the object in SHM */
268 	if (flags & FTL_MD_DESTROY_SHM_KEEP) {
269 		return;
270 	}
271 
272 	/* Otherwise destroy/unlink the object */
273 	assert(md->name[0] != 0 && md->shm_unlink != NULL);
274 	md->shm_unlink(md->name);
275 }
276 
277 struct ftl_md *ftl_md_create(struct spdk_ftl_dev *dev, uint64_t blocks,
278 			     uint64_t vss_blksz, const char *name, int flags,
279 			     const struct ftl_layout_region *region)
280 {
281 	struct ftl_md *md;
282 
283 	md = calloc(1, sizeof(*md));
284 	if (!md) {
285 		return NULL;
286 	}
287 	md->dev = dev;
288 	md->data_blocks = blocks;
289 	md->mirror_enabled = true;
290 
291 	if (flags != FTL_MD_CREATE_NO_MEM) {
292 		if (flags & FTL_MD_CREATE_SHM) {
293 			ftl_md_setup_obj(md, flags, name);
294 			ftl_md_create_shm(md, vss_blksz, flags);
295 		} else if (flags & FTL_MD_CREATE_SPDK_BUF) {
296 			ftl_md_create_spdk_buf(md, vss_blksz);
297 		} else {
298 			assert((flags & FTL_MD_CREATE_HEAP) == FTL_MD_CREATE_HEAP);
299 			ftl_md_create_heap(md, vss_blksz);
300 		}
301 
302 		if (!md->data) {
303 			free(md);
304 			return NULL;
305 		}
306 	}
307 
308 	if (region) {
309 		size_t entry_vss_buf_size = vss_blksz * region->entry_size;
310 
311 		if (entry_vss_buf_size) {
312 			md->entry_vss_dma_buf = spdk_malloc(entry_vss_buf_size, FTL_BLOCK_SIZE,
313 							    NULL, SPDK_ENV_LCORE_ID_ANY,
314 							    SPDK_MALLOC_DMA);
315 			if (!md->entry_vss_dma_buf) {
316 				goto err;
317 			}
318 		}
319 
320 		ftl_md_set_region(md, region);
321 	}
322 
323 	return md;
324 err:
325 	ftl_md_destroy(md, ftl_md_destroy_region_flags(dev, region->type));
326 	return NULL;
327 }
328 
329 int
330 ftl_md_unlink(struct spdk_ftl_dev *dev, const char *name, int flags)
331 {
332 	struct ftl_md md = { 0 };
333 
334 	if (0 == (flags & FTL_MD_CREATE_SHM)) {
335 		/* Unlink can be called for shared memory only */
336 		return -EINVAL;
337 	}
338 
339 	md.dev = dev;
340 	ftl_md_setup_obj(&md, flags, name);
341 
342 	return md.shm_unlink(md.name);
343 }
344 
345 void
346 ftl_md_destroy(struct ftl_md *md, int flags)
347 {
348 	if (!md) {
349 		return;
350 	}
351 
352 	if (!md->is_mirror) {
353 		ftl_md_free_buf(md, flags);
354 		spdk_free(md->entry_vss_dma_buf);
355 	}
356 	free(md);
357 }
358 
359 void
360 ftl_md_free_buf(struct ftl_md *md, int flags)
361 {
362 	if (!md) {
363 		return;
364 	}
365 
366 	if (md->shm_fd < 0) {
367 		if (flags & FTL_MD_DESTROY_SPDK_BUF) {
368 			ftl_md_destroy_spdk_buf(md);
369 		} else {
370 			assert(flags == 0);
371 			ftl_md_destroy_heap(md);
372 		}
373 	} else {
374 		ftl_md_destroy_shm(md, flags);
375 	}
376 }
377 
378 void *
379 ftl_md_get_buffer(struct ftl_md *md)
380 {
381 	return md->data;
382 }
383 
384 uint64_t
385 ftl_md_get_buffer_size(struct ftl_md *md)
386 {
387 	return md->data_blocks * FTL_BLOCK_SIZE;
388 }
389 
390 static void
391 ftl_md_vss_buf_init(union ftl_md_vss *buf, uint32_t count,
392 		    const union ftl_md_vss *vss_pattern)
393 {
394 	while (count) {
395 		count--;
396 		buf[count] = *vss_pattern;
397 	}
398 }
399 
400 union ftl_md_vss *ftl_md_vss_buf_alloc(struct ftl_layout_region *region, uint32_t count)
401 {
402 	union ftl_md_vss *buf = spdk_zmalloc(count * FTL_MD_VSS_SZ, FTL_BLOCK_SIZE, NULL,
403 						     SPDK_ENV_LCORE_ID_ANY,
404 						     SPDK_MALLOC_DMA);
405 
406 	if (!buf) {
407 		return NULL;
408 	}
409 
410 	union ftl_md_vss vss_buf = {0};
411 	vss_buf.version.md_version = region->current.version;
412 	ftl_md_vss_buf_init(buf, count, &vss_buf);
413 	return buf;
414 }
415 
416 union ftl_md_vss *ftl_md_get_vss_buffer(struct ftl_md *md)
417 {
418 	return md->vss_data;
419 }
420 
421 static void
422 io_cleanup(struct ftl_md *md)
423 {
424 	spdk_dma_free(md->io.data);
425 	md->io.data = NULL;
426 
427 	spdk_dma_free(md->io.md);
428 	md->io.md = NULL;
429 }
430 
431 static void
432 exception(void *arg)
433 {
434 	struct ftl_md *md = arg;
435 
436 	md->cb(md->dev, md, -EINVAL);
437 	io_cleanup(md);
438 }
439 
440 static inline enum ftl_stats_type
441 get_bdev_io_ftl_stats_type(struct spdk_ftl_dev *dev, struct spdk_bdev_io *bdev_io) {
442 	struct spdk_bdev *nvc = spdk_bdev_desc_get_bdev(dev->nv_cache.bdev_desc);
443 
444 	if (bdev_io->bdev == nvc)
445 	{
446 		return FTL_STATS_TYPE_MD_NV_CACHE;
447 	} else
448 	{
449 		return FTL_STATS_TYPE_MD_BASE;
450 	}
451 }
452 
453 static void
454 read_write_blocks_cb(struct spdk_bdev_io *bdev_io, bool success, void *arg)
455 {
456 	struct ftl_md *md = arg;
457 
458 	ftl_stats_bdev_io_completed(md->dev, get_bdev_io_ftl_stats_type(md->dev, bdev_io), bdev_io);
459 
460 	if (spdk_unlikely(!success)) {
461 		if (md->io.op == FTL_MD_OP_RESTORE && has_mirror(md)) {
462 			md->io.status = -EAGAIN;
463 		} else {
464 			md->io.status = -EIO;
465 		}
466 	} else {
467 		uint64_t blocks = bdev_io->u.bdev.num_blocks;
468 		uint64_t size = blocks * FTL_BLOCK_SIZE;
469 
470 		if (md->io.op == FTL_MD_OP_RESTORE) {
471 			memcpy(md->data + md->io.data_offset, md->io.data, size);
472 
473 			if (md->vss_data) {
474 				uint64_t vss_offset = md->io.data_offset / FTL_BLOCK_SIZE;
475 				vss_offset *= FTL_MD_VSS_SZ;
476 				memcpy(md->vss_data + vss_offset, md->io.md, blocks * FTL_MD_VSS_SZ);
477 			}
478 		}
479 
480 		md->io.address += blocks;
481 		md->io.remaining -= blocks;
482 		md->io.data_offset += size;
483 	}
484 
485 	spdk_bdev_free_io(bdev_io);
486 
487 	io_submit(md);
488 }
489 
490 static inline int
491 read_blocks(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *desc,
492 	    struct spdk_io_channel *ch,
493 	    void *buf, void *md_buf,
494 	    uint64_t offset_blocks, uint64_t num_blocks,
495 	    spdk_bdev_io_completion_cb cb, void *cb_arg)
496 {
497 	if (desc == dev->nv_cache.bdev_desc) {
498 		return ftl_nv_cache_bdev_read_blocks_with_md(dev, desc, ch, buf, md_buf,
499 				offset_blocks, num_blocks,
500 				cb, cb_arg);
501 	} else if (md_buf) {
502 		return spdk_bdev_read_blocks_with_md(desc, ch, buf, md_buf,
503 						     offset_blocks, num_blocks,
504 						     cb, cb_arg);
505 	} else {
506 		return spdk_bdev_read_blocks(desc, ch, buf,
507 					     offset_blocks, num_blocks,
508 					     cb, cb_arg);
509 	}
510 }
511 
512 static inline int
513 write_blocks(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *desc,
514 	     struct spdk_io_channel *ch,
515 	     void *buf, void *md_buf,
516 	     uint64_t offset_blocks, uint64_t num_blocks,
517 	     spdk_bdev_io_completion_cb cb, void *cb_arg)
518 {
519 	if (desc == dev->nv_cache.bdev_desc) {
520 		return ftl_nv_cache_bdev_write_blocks_with_md(dev, desc, ch, buf, md_buf,
521 				offset_blocks, num_blocks,
522 				cb, cb_arg);
523 	} else if (md_buf) {
524 		return spdk_bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks,
525 						      num_blocks, cb, cb_arg);
526 	} else {
527 		return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
528 	}
529 }
530 
531 static void
532 read_write_blocks(void *_md)
533 {
534 	struct ftl_md *md = _md;
535 	const struct ftl_layout_region *region = md->region;
536 	uint64_t blocks;
537 	int rc = 0;
538 
539 	blocks = spdk_min(md->io.remaining, ftl_md_xfer_blocks(md->dev));
540 
541 	switch (md->io.op) {
542 	case FTL_MD_OP_RESTORE:
543 		rc = read_blocks(md->dev, region->bdev_desc, region->ioch,
544 				 md->io.data, md->io.md,
545 				 md->io.address, blocks,
546 				 read_write_blocks_cb, md);
547 		break;
548 	case FTL_MD_OP_PERSIST:
549 	case FTL_MD_OP_CLEAR:
550 		rc = write_blocks(md->dev, region->bdev_desc, region->ioch,
551 				  md->io.data, md->io.md,
552 				  md->io.address, blocks,
553 				  read_write_blocks_cb, md);
554 		break;
555 	default:
556 		ftl_abort();
557 	}
558 
559 	if (spdk_unlikely(rc)) {
560 		if (rc == -ENOMEM) {
561 			struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(region->bdev_desc);
562 			md->io.bdev_io_wait.bdev = bdev;
563 			md->io.bdev_io_wait.cb_fn = read_write_blocks;
564 			md->io.bdev_io_wait.cb_arg = md;
565 			spdk_bdev_queue_io_wait(bdev, region->ioch, &md->io.bdev_io_wait);
566 		} else {
567 			ftl_abort();
568 		}
569 	}
570 }
571 
572 static void
573 io_submit(struct ftl_md *md)
574 {
575 	if (!md->io.remaining || md->io.status) {
576 		io_done(md);
577 		return;
578 	}
579 
580 	if (md->io.op == FTL_MD_OP_PERSIST) {
581 		uint64_t blocks = spdk_min(md->io.remaining, ftl_md_xfer_blocks(md->dev));
582 
583 		memcpy(md->io.data, md->data + md->io.data_offset, FTL_BLOCK_SIZE * blocks);
584 
585 		if (md->vss_data) {
586 			uint64_t vss_offset = md->io.data_offset / FTL_BLOCK_SIZE;
587 			vss_offset *= FTL_MD_VSS_SZ;
588 			assert(md->io.md);
589 			memcpy(md->io.md, md->vss_data + vss_offset, FTL_MD_VSS_SZ * blocks);
590 		}
591 	}
592 
593 	read_write_blocks(md);
594 }
595 
596 static int
597 io_can_start(struct ftl_md *md)
598 {
599 	assert(NULL == md->io.data);
600 	if (NULL != md->io.data) {
601 		/* Outgoing IO on metadata */
602 		return -EINVAL;
603 	}
604 
605 	if (!md->region) {
606 		/* No device region to process data */
607 		return -EINVAL;
608 	}
609 
610 	if (md->region->current.blocks > md->data_blocks) {
611 		/* No device region to process data */
612 		FTL_ERRLOG(md->dev, "Blocks number mismatch between metadata object and"
613 			   "device region\n");
614 		return -EINVAL;
615 	}
616 
617 	return 0;
618 }
619 
620 static int
621 io_prepare(struct ftl_md *md, enum ftl_md_ops op)
622 {
623 	const struct ftl_layout_region *region = md->region;
624 	uint64_t data_size, meta_size = 0;
625 
626 	/* Allocates buffer for IO */
627 	data_size = xfer_size(md);
628 	md->io.data = spdk_zmalloc(data_size, FTL_BLOCK_SIZE, NULL,
629 				   SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
630 	if (!md->io.data) {
631 		return -ENOMEM;
632 	}
633 
634 	if (md->vss_data || md->region->vss_blksz) {
635 		meta_size = ftl_md_xfer_blocks(md->dev) * FTL_MD_VSS_SZ;
636 		md->io.md = spdk_zmalloc(meta_size, FTL_BLOCK_SIZE, NULL,
637 					 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
638 		if (!md->io.md) {
639 			spdk_dma_free(md->io.data);
640 			md->io.data = NULL;
641 			return -ENOMEM;
642 		}
643 	}
644 
645 	md->io.address = region->current.offset;
646 	md->io.remaining = region->current.blocks;
647 	md->io.data_offset = 0;
648 	md->io.status = 0;
649 	md->io.op = op;
650 
651 	return 0;
652 }
653 
654 static int
655 io_init(struct ftl_md *md, enum ftl_md_ops op)
656 {
657 	if (io_can_start(md)) {
658 		return -EINVAL;
659 	}
660 
661 	if (io_prepare(md, op)) {
662 		return -ENOMEM;
663 	}
664 
665 	return 0;
666 }
667 
668 static uint64_t
669 persist_entry_lba(struct ftl_md *md, uint64_t start_entry)
670 {
671 	return md->region->current.offset + start_entry * md->region->entry_size;
672 }
673 
674 static void
675 persist_entry_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
676 {
677 	struct ftl_md_io_entry_ctx *ctx = cb_arg;
678 	struct ftl_md *md = ctx->md;
679 
680 	ftl_stats_bdev_io_completed(md->dev, get_bdev_io_ftl_stats_type(md->dev, bdev_io), bdev_io);
681 
682 	spdk_bdev_free_io(bdev_io);
683 
684 	assert(ctx->remaining > 0);
685 	ctx->remaining--;
686 
687 	if (!success) {
688 		ctx->status = -EIO;
689 	}
690 
691 	if (!ctx->remaining) {
692 		ctx->cb(ctx->status, ctx->cb_arg);
693 	}
694 }
695 
696 static int
697 ftl_md_persist_entry_write_blocks(struct ftl_md_io_entry_ctx *ctx, struct ftl_md *md,
698 				  spdk_bdev_io_wait_cb retry_fn)
699 {
700 	int rc;
701 
702 	rc = write_blocks(md->dev, md->region->bdev_desc, md->region->ioch,
703 			  ctx->buffer, ctx->vss_buffer,
704 			  persist_entry_lba(md, ctx->start_entry), md->region->entry_size * ctx->num_entries,
705 			  persist_entry_cb, ctx);
706 	if (spdk_unlikely(rc)) {
707 		if (rc == -ENOMEM) {
708 			struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(md->region->bdev_desc);
709 			ctx->bdev_io_wait.bdev = bdev;
710 			ctx->bdev_io_wait.cb_fn = retry_fn;
711 			ctx->bdev_io_wait.cb_arg = ctx;
712 			spdk_bdev_queue_io_wait(bdev, md->region->ioch, &ctx->bdev_io_wait);
713 		} else {
714 			ftl_abort();
715 		}
716 	}
717 
718 	return rc;
719 }
720 
721 static void
722 ftl_md_persist_entry_mirror(void *_ctx)
723 {
724 	struct ftl_md_io_entry_ctx *ctx = _ctx;
725 	struct ftl_md *md_mirror = ftl_md_get_mirror(ctx->md);
726 
727 	ftl_md_persist_entry_write_blocks(ctx, md_mirror, ftl_md_persist_entry_mirror);
728 }
729 
730 static void
731 ftl_md_persist_entry_primary(void *_ctx)
732 {
733 	struct ftl_md_io_entry_ctx *ctx = _ctx;
734 	struct ftl_md *md = ctx->md;
735 	int rc;
736 
737 	rc = ftl_md_persist_entry_write_blocks(ctx, md, ftl_md_persist_entry_primary);
738 
739 	if (!rc && has_mirror(md)) {
740 		assert(md->region->entry_size == (ftl_md_get_mirror(md))->region->entry_size);
741 
742 		/* The MD object has mirror so execute persist on it too */
743 		ftl_md_persist_entry_mirror(ctx);
744 		ctx->remaining++;
745 	}
746 }
747 
748 static void
749 _ftl_md_persist_entry(struct ftl_md_io_entry_ctx *ctx)
750 {
751 	ctx->status = 0;
752 	ctx->remaining = 1;
753 
754 	/* First execute an IO to the primary region */
755 	ftl_md_persist_entry_primary(ctx);
756 }
757 
758 void
759 ftl_md_persist_entries(struct ftl_md *md, uint64_t start_entry, uint64_t num_entries, void *buffer,
760 		       void *vss_buffer, ftl_md_io_entry_cb cb, void *cb_arg,
761 		       struct ftl_md_io_entry_ctx *ctx)
762 {
763 	if (spdk_unlikely(0 == md->region->entry_size)) {
764 		/* This MD has not been configured to support persist entry call */
765 		ftl_abort();
766 	}
767 	if (spdk_unlikely(start_entry + num_entries > md->region->num_entries)) {
768 		/* Exceeding number of available entries */
769 		ftl_abort();
770 	}
771 
772 	/* Initialize persist entry context */
773 	ctx->cb = cb;
774 	ctx->cb_arg = cb_arg;
775 	ctx->md = md;
776 	ctx->start_entry = start_entry;
777 	ctx->buffer = buffer;
778 	ctx->num_entries = num_entries;
779 	ctx->vss_buffer = vss_buffer ? : md->entry_vss_dma_buf;
780 
781 	_ftl_md_persist_entry(ctx);
782 }
783 
784 static void
785 read_entry_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
786 {
787 	struct ftl_md_io_entry_ctx *ctx = cb_arg;
788 	struct ftl_md *md = ctx->md;
789 
790 	ftl_stats_bdev_io_completed(md->dev, get_bdev_io_ftl_stats_type(md->dev, bdev_io), bdev_io);
791 
792 	spdk_bdev_free_io(bdev_io);
793 
794 	if (!success) {
795 		if (has_mirror(md)) {
796 			md->mirror_enabled = true;
797 
798 			/* First read from the mirror */
799 			ftl_md_read_entry(ftl_md_get_mirror(md), ctx->start_entry, ctx->buffer,
800 					  ctx->vss_buffer,
801 					  ctx->cb, ctx->cb_arg,
802 					  ctx);
803 			return;
804 		} else {
805 			ctx->status = -EIO;
806 			goto finish_io;
807 		}
808 	}
809 
810 finish_io:
811 	ctx->cb(ctx->status, ctx->cb_arg);
812 }
813 
814 static void
815 ftl_md_read_entry_read_blocks(struct ftl_md_io_entry_ctx *ctx, struct ftl_md *md,
816 			      spdk_bdev_io_wait_cb retry_fn)
817 {
818 	int rc;
819 
820 	rc = read_blocks(md->dev, md->region->bdev_desc, md->region->ioch,
821 			 ctx->buffer, ctx->vss_buffer,
822 			 persist_entry_lba(md, ctx->start_entry), md->region->entry_size,
823 			 read_entry_cb, ctx);
824 
825 	if (spdk_unlikely(rc)) {
826 		if (rc == -ENOMEM) {
827 			struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(md->region->bdev_desc);
828 			ctx->bdev_io_wait.bdev = bdev;
829 			ctx->bdev_io_wait.cb_fn = retry_fn;
830 			ctx->bdev_io_wait.cb_arg = ctx;
831 			spdk_bdev_queue_io_wait(bdev, md->region->ioch, &ctx->bdev_io_wait);
832 		} else {
833 			ftl_abort();
834 		}
835 	}
836 }
837 
838 static void
839 _ftl_md_read_entry(void *_ctx)
840 {
841 	struct ftl_md_io_entry_ctx *ctx = _ctx;
842 
843 	ftl_md_read_entry_read_blocks(ctx, ctx->md, _ftl_md_read_entry);
844 }
845 
846 void
847 ftl_md_read_entry(struct ftl_md *md, uint64_t start_entry, void *buffer, void *vss_buffer,
848 		  ftl_md_io_entry_cb cb, void *cb_arg,
849 		  struct ftl_md_io_entry_ctx *ctx)
850 {
851 	if (spdk_unlikely(0 == md->region->entry_size)) {
852 		/* This MD has not been configured to support read entry call */
853 		ftl_abort();
854 	}
855 
856 	ctx->cb = cb;
857 	ctx->cb_arg = cb_arg;
858 	ctx->md = md;
859 	ctx->start_entry = start_entry;
860 	ctx->buffer = buffer;
861 	ctx->vss_buffer = vss_buffer;
862 
863 	_ftl_md_read_entry(ctx);
864 }
865 
866 void
867 ftl_md_persist_entry_retry(struct ftl_md_io_entry_ctx *ctx)
868 {
869 	_ftl_md_persist_entry(ctx);
870 }
871 
872 static void
873 persist_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
874 {
875 	struct ftl_md *primary = md->owner.private;
876 
877 	if (status) {
878 		/* We got an error, stop persist procedure immediately */
879 		primary->io.status = status;
880 		io_done(primary);
881 	} else {
882 		/* Now continue the persist procedure on the primary MD object */
883 		if (0 == io_init(primary, FTL_MD_OP_PERSIST)) {
884 			io_submit(primary);
885 		} else {
886 			spdk_thread_send_msg(spdk_get_thread(), exception, primary);
887 		}
888 	}
889 }
890 
891 void
892 ftl_md_persist(struct ftl_md *md)
893 {
894 	if (has_mirror(md)) {
895 		struct ftl_md *md_mirror = ftl_md_get_mirror(md);
896 
897 		md->mirror_enabled = true;
898 
899 		/* Set callback and context in mirror */
900 		md_mirror->cb = persist_mirror_cb;
901 		md_mirror->owner.private = md;
902 
903 		/* First persist the mirror */
904 		ftl_md_persist(md_mirror);
905 		return;
906 	}
907 
908 	if (0 == io_init(md, FTL_MD_OP_PERSIST)) {
909 		io_submit(md);
910 	} else {
911 		spdk_thread_send_msg(spdk_get_thread(), exception, md);
912 	}
913 }
914 
915 static void
916 restore_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
917 {
918 	struct ftl_md *primary = md->owner.private;
919 
920 	if (status) {
921 		/* Cannot restore the object from the mirror too, mark error and fail */
922 		primary->io.status = -EIO;
923 		io_done(primary);
924 	} else {
925 		/*
926 		 * Restoring from the mirror successful. Synchronize mirror to the primary.
927 		 * Because we read MD content from the mirror, we can disable it, only the primary
928 		 * requires persisting.
929 		 */
930 		primary->io.status = 0;
931 		primary->mirror_enabled = false;
932 		io_cleanup(primary);
933 		ftl_md_persist(primary);
934 		primary->mirror_enabled = true;
935 	}
936 }
937 
938 static void
939 restore_sync_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
940 {
941 	struct ftl_md *primary = md->owner.private;
942 
943 	if (status) {
944 		/* Cannot sync the object from the primary to the mirror, mark error and fail */
945 		primary->io.status = -EIO;
946 		io_done(primary);
947 	} else {
948 		primary->cb(dev, primary, primary->io.status);
949 		io_cleanup(primary);
950 	}
951 }
952 
953 static int
954 restore_done(struct ftl_md *md)
955 {
956 	if (-EAGAIN == md->io.status) {
957 		/* Failed to read MD from primary region, try it from mirror.
958 		 * At the moment read the mirror entirely, (TODO) in the
959 		 * feature we can restore from primary and mirror region
960 		 * with finer granularity.
961 		 */
962 
963 		if (has_mirror(md)) {
964 			struct ftl_md *md_mirror = ftl_md_get_mirror(md);
965 
966 			md->mirror_enabled = true;
967 
968 			/* Set callback and context in mirror */
969 			md_mirror->cb = restore_mirror_cb;
970 			md_mirror->owner.private = md;
971 
972 			/* First persist the mirror */
973 			ftl_md_restore(md_mirror);
974 			return -EAGAIN;
975 		} else {
976 			return -EIO;
977 		}
978 	} else if (0 == md->io.status && false == md->dev->sb->clean) {
979 		if (has_mirror(md)) {
980 			struct ftl_md *md_mirror = ftl_md_get_mirror(md);
981 			/* There was a dirty shutdown, synchronize primary to mirror */
982 
983 			/* Set callback and context in the mirror */
984 			md_mirror->cb = restore_sync_cb;
985 			md_mirror->owner.private = md;
986 
987 			/* First persist the mirror */
988 			ftl_md_persist(md_mirror);
989 			return -EAGAIN;
990 		}
991 	}
992 
993 	return md->io.status;
994 }
995 
996 static void
997 io_done(struct ftl_md *md)
998 {
999 	int status;
1000 
1001 	if (md->io.op == FTL_MD_OP_RESTORE) {
1002 		status = restore_done(md);
1003 	} else {
1004 		status = md->io.status;
1005 	}
1006 
1007 	if (status != -EAGAIN) {
1008 		/* The MD instance may be destroyed in ctx of md->cb(), e.g. upon region upgrade. */
1009 		/* Need to cleanup DMA bufs first. */
1010 		io_cleanup(md);
1011 		md->cb(md->dev, md, status);
1012 	}
1013 }
1014 
1015 void
1016 ftl_md_restore(struct ftl_md *md)
1017 {
1018 	if (0 == io_init(md, FTL_MD_OP_RESTORE)) {
1019 		io_submit(md);
1020 	} else {
1021 		spdk_thread_send_msg(spdk_get_thread(), exception, md);
1022 	}
1023 }
1024 
1025 static int
1026 pattern_prepare(struct ftl_md *md,
1027 		int data_pattern, union ftl_md_vss *vss_pattern)
1028 {
1029 	void *data = md->io.data;
1030 	uint64_t data_size = xfer_size(md);
1031 
1032 	memset(data, data_pattern, data_size);
1033 
1034 	if (md->io.md) {
1035 		if (vss_pattern) {
1036 			/* store the VSS pattern... */
1037 			ftl_md_vss_buf_init(md->io.md, ftl_md_xfer_blocks(md->dev), vss_pattern);
1038 		} else {
1039 			/* ...or default init VSS to 0 */
1040 			union ftl_md_vss vss = {0};
1041 
1042 			vss.version.md_version = md->region->current.version;
1043 			ftl_md_vss_buf_init(md->io.md, ftl_md_xfer_blocks(md->dev), &vss);
1044 		}
1045 	}
1046 
1047 	return 0;
1048 }
1049 
1050 static void
1051 clear_mirror_cb(struct spdk_ftl_dev *dev, struct ftl_md *secondary, int status)
1052 {
1053 	struct ftl_md *primary = secondary->owner.private;
1054 
1055 	if (status) {
1056 		/* We got an error, stop persist procedure immediately */
1057 		primary->io.status = status;
1058 		io_done(primary);
1059 	} else {
1060 		/* Now continue the persist procedure on the primary MD object */
1061 		io_submit(primary);
1062 	}
1063 }
1064 
1065 void
1066 ftl_md_clear(struct ftl_md *md, int data_pattern, union ftl_md_vss *vss_pattern)
1067 {
1068 	if (has_mirror(md)) {
1069 		struct ftl_md *md_mirror = ftl_md_get_mirror(md);
1070 
1071 		md->mirror_enabled = true;
1072 
1073 		/* Set callback and context in mirror */
1074 		md_mirror->cb = clear_mirror_cb;
1075 		md_mirror->owner.private = md;
1076 
1077 		/* The pattern bufs will not be available outside of this fn context */
1078 		/* Configure the IO for the primary region now */
1079 		if (0 == io_init(md, FTL_MD_OP_CLEAR) && 0 == pattern_prepare(md, data_pattern, vss_pattern)) {
1080 			/* First persist the mirror */
1081 			ftl_md_clear(md_mirror, data_pattern, vss_pattern);
1082 		} else {
1083 			spdk_thread_send_msg(spdk_get_thread(), exception, md);
1084 		}
1085 		return;
1086 	}
1087 
1088 	if (0 == io_init(md, FTL_MD_OP_CLEAR) && 0 == pattern_prepare(md, data_pattern, vss_pattern)) {
1089 		io_submit(md);
1090 	} else {
1091 		spdk_thread_send_msg(spdk_get_thread(), exception, md);
1092 	}
1093 }
1094 
1095 const struct ftl_layout_region *
1096 ftl_md_get_region(struct ftl_md *md)
1097 {
1098 	return md->region;
1099 }
1100 
1101 void
1102 ftl_md_set_region(struct ftl_md *md,
1103 		  const struct ftl_layout_region *region)
1104 {
1105 	assert(region->current.blocks <= md->data_blocks);
1106 	md->region = region;
1107 
1108 	if (md->vss_data) {
1109 		union ftl_md_vss vss = {0};
1110 		vss.version.md_version = region->current.version;
1111 		ftl_md_vss_buf_init(md->vss_data, md->data_blocks, &vss);
1112 		if (region->entry_size) {
1113 			assert(md->entry_vss_dma_buf);
1114 			ftl_md_vss_buf_init(md->entry_vss_dma_buf, region->entry_size, &vss);
1115 		}
1116 	}
1117 
1118 	if (has_mirror(md)) {
1119 		md->mirror_enabled = true;
1120 	}
1121 }
1122 
1123 int
1124 ftl_md_create_region_flags(struct spdk_ftl_dev *dev, int region_type)
1125 {
1126 	int flags = FTL_MD_CREATE_SHM;
1127 
1128 	switch (region_type) {
1129 	case FTL_LAYOUT_REGION_TYPE_SB:
1130 		if (dev->conf.mode & SPDK_FTL_MODE_CREATE) {
1131 			flags |= FTL_MD_CREATE_SHM_NEW;
1132 		}
1133 		break;
1134 
1135 	case FTL_LAYOUT_REGION_TYPE_BAND_MD:
1136 	case FTL_LAYOUT_REGION_TYPE_NVC_MD:
1137 		if (!ftl_fast_startup(dev)) {
1138 			flags |= FTL_MD_CREATE_SHM_NEW;
1139 		}
1140 		break;
1141 	case FTL_LAYOUT_REGION_TYPE_VALID_MAP:
1142 	case FTL_LAYOUT_REGION_TYPE_TRIM_MD:
1143 	case FTL_LAYOUT_REGION_TYPE_TRIM_LOG:
1144 		if (!ftl_fast_startup(dev) && !ftl_fast_recovery(dev)) {
1145 			flags |= FTL_MD_CREATE_SHM_NEW;
1146 		}
1147 		break;
1148 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_GC:
1149 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_GC_NEXT:
1150 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_COMP:
1151 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_COMP_NEXT:
1152 		return FTL_MD_CREATE_SPDK_BUF;
1153 	default:
1154 		return FTL_MD_CREATE_HEAP;
1155 	}
1156 
1157 	return flags;
1158 }
1159 
1160 int
1161 ftl_md_destroy_region_flags(struct spdk_ftl_dev *dev, int region_type)
1162 {
1163 	switch (region_type) {
1164 	case FTL_LAYOUT_REGION_TYPE_SB:
1165 	case FTL_LAYOUT_REGION_TYPE_BAND_MD:
1166 	case FTL_LAYOUT_REGION_TYPE_VALID_MAP:
1167 	case FTL_LAYOUT_REGION_TYPE_NVC_MD:
1168 	case FTL_LAYOUT_REGION_TYPE_TRIM_MD:
1169 	case FTL_LAYOUT_REGION_TYPE_TRIM_LOG:
1170 		if (dev->conf.fast_shutdown) {
1171 			return FTL_MD_DESTROY_SHM_KEEP;
1172 		}
1173 		break;
1174 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_GC:
1175 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_GC_NEXT:
1176 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_COMP:
1177 	case FTL_LAYOUT_REGION_TYPE_P2L_CKPT_COMP_NEXT:
1178 		return FTL_MD_DESTROY_SPDK_BUF;
1179 	default:
1180 		break;
1181 	}
1182 	return 0;
1183 }
1184 
1185 int
1186 ftl_md_create_shm_flags(struct spdk_ftl_dev *dev)
1187 {
1188 	int flags = FTL_MD_CREATE_SHM;
1189 
1190 	if (!ftl_fast_startup(dev) && !ftl_fast_recovery(dev)) {
1191 		flags |= FTL_MD_CREATE_SHM_NEW;
1192 	}
1193 	return flags;
1194 }
1195 
1196 int
1197 ftl_md_destroy_shm_flags(struct spdk_ftl_dev *dev)
1198 {
1199 	return (dev->conf.fast_shutdown) ? FTL_MD_DESTROY_SHM_KEEP : 0;
1200 }
1201