xref: /spdk/lib/ftl/ftl_init.c (revision 0098e636761237b77c12c30c2408263a5d2260cc)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 #include "spdk/nvme.h"
8 #include "spdk/thread.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/log.h"
12 #include "spdk/ftl.h"
13 #include "spdk/likely.h"
14 #include "spdk/string.h"
15 #include "spdk/bdev_zone.h"
16 #include "spdk/bdev_module.h"
17 #include "spdk/config.h"
18 
19 #include "ftl_core.h"
20 #include "ftl_io.h"
21 #include "ftl_reloc.h"
22 #include "ftl_band.h"
23 #include "ftl_debug.h"
24 
25 #ifdef SPDK_CONFIG_PMDK
26 #include "libpmem.h"
27 #endif /* SPDK_CONFIG_PMDK */
28 
29 #define FTL_CORE_RING_SIZE	4096
30 #define FTL_INIT_TIMEOUT	30
31 #define FTL_NSID		1
32 #define FTL_ZONE_INFO_COUNT	64
33 
34 /* Dummy bdev module used to to claim bdevs. */
35 static struct spdk_bdev_module g_ftl_bdev_module = {
36 	.name	= "ftl_lib",
37 };
38 
39 struct ftl_dev_init_ctx {
40 	/* Owner */
41 	struct spdk_ftl_dev		*dev;
42 	/* Initial arguments */
43 	struct spdk_ftl_dev_init_opts	opts;
44 	/* IO channel for zone info retrieving */
45 	struct spdk_io_channel		*ioch;
46 	/* Buffer for reading zone info  */
47 	struct spdk_bdev_zone_info	info[FTL_ZONE_INFO_COUNT];
48 	/* Currently read zone */
49 	size_t				zone_id;
50 	/* User's callback */
51 	spdk_ftl_init_fn		cb_fn;
52 	/* Callback's argument */
53 	void				*cb_arg;
54 	/* Thread to call the callback on */
55 	struct spdk_thread		*thread;
56 	/* Poller to check if the device has been destroyed/initialized */
57 	struct spdk_poller		*poller;
58 	/* Status to return for halt completion callback */
59 	int				halt_complete_status;
60 };
61 
62 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
63 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
64 static const struct spdk_ftl_conf	g_default_conf = {
65 	.limits = {
66 		/* 5 free bands  / 0 % host writes */
67 		[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
68 		/* 10 free bands / 5 % host writes */
69 		[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
70 		/* 20 free bands / 40 % host writes */
71 		[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
72 		/* 40 free bands / 100 % host writes - defrag starts running */
73 		[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
74 	},
75 	/* 10 percent valid blocks */
76 	.invalid_thld = 10,
77 	/* 20% spare blocks */
78 	.lba_rsvd = 20,
79 	/* 6M write buffer per each IO channel */
80 	.write_buffer_size = 6 * 1024 * 1024,
81 	/* 90% band fill threshold */
82 	.band_thld = 90,
83 	/* Max 32 IO depth per band relocate */
84 	.max_reloc_qdepth = 32,
85 	/* Max 3 active band relocates */
86 	.max_active_relocs = 3,
87 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
88 	.user_io_pool_size = 2048,
89 	/*
90 	 * If clear ftl will return error when restoring after a dirty shutdown
91 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
92 	 * will result in lost data after recovery.
93 	 */
94 	.allow_open_bands = false,
95 	.max_io_channels = 128,
96 	.nv_cache = {
97 		/* Maximum number of concurrent requests */
98 		.max_request_cnt = 2048,
99 		/* Maximum number of blocks per request */
100 		.max_request_size = 16,
101 	}
102 };
103 
104 static int
105 ftl_band_init_md(struct ftl_band *band)
106 {
107 	struct ftl_lba_map *lba_map = &band->lba_map;
108 	int rc;
109 
110 	lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
111 	if (!lba_map->vld) {
112 		return -ENOMEM;
113 	}
114 
115 	rc = pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
116 	if (rc) {
117 		spdk_bit_array_free(&lba_map->vld);
118 		return rc;
119 	}
120 	ftl_band_md_clear(band);
121 	return 0;
122 }
123 
124 static int
125 ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
126 {
127 	size_t i;
128 
129 	if (conf->invalid_thld >= 100) {
130 		return -1;
131 	}
132 	if (conf->lba_rsvd >= 100) {
133 		return -1;
134 	}
135 	if (conf->lba_rsvd == 0) {
136 		return -1;
137 	}
138 	if (conf->write_buffer_size == 0) {
139 		return -1;
140 	}
141 	if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
142 		return -1;
143 	}
144 
145 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
146 		if (conf->limits[i].limit > 100) {
147 			return -1;
148 		}
149 	}
150 
151 	return 0;
152 }
153 
154 static int
155 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
156 {
157 	struct ftl_band *band, *pband;
158 	unsigned int i;
159 	int rc = 0;
160 
161 	LIST_INIT(&dev->free_bands);
162 	LIST_INIT(&dev->shut_bands);
163 
164 	dev->num_free = 0;
165 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
166 	if (!dev->bands) {
167 		return -1;
168 	}
169 
170 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
171 		band = &dev->bands[i];
172 		band->id = i;
173 		band->dev = dev;
174 		band->state = FTL_BAND_STATE_CLOSED;
175 
176 		if (LIST_EMPTY(&dev->shut_bands)) {
177 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
178 		} else {
179 			LIST_INSERT_AFTER(pband, band, list_entry);
180 		}
181 		pband = band;
182 
183 		CIRCLEQ_INIT(&band->zones);
184 		band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
185 		if (!band->zone_buf) {
186 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
187 			rc = -1;
188 			break;
189 		}
190 
191 		rc = ftl_band_init_md(band);
192 		if (rc) {
193 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
194 			break;
195 		}
196 
197 		band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
198 		if (!band->reloc_bitmap) {
199 			SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
200 			break;
201 		}
202 	}
203 
204 	return rc;
205 }
206 
207 static void
208 ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
209 {
210 	struct spdk_ftl_dev *dev = event_ctx;
211 
212 	switch (type) {
213 	case SPDK_BDEV_EVENT_REMOVE:
214 		assert(0);
215 		break;
216 	case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
217 		assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
218 		ftl_get_media_events(dev);
219 	default:
220 		break;
221 	}
222 }
223 
224 static int
225 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
226 {
227 	struct spdk_bdev *bdev;
228 	struct spdk_ftl_conf *conf = &dev->conf;
229 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
230 	char pool_name[128];
231 	int rc;
232 
233 	if (!bdev_name) {
234 		return 0;
235 	}
236 
237 	bdev = spdk_bdev_get_by_name(bdev_name);
238 	if (!bdev) {
239 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
240 		return -1;
241 	}
242 
243 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
244 			       dev, &nv_cache->bdev_desc)) {
245 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
246 		return -1;
247 	}
248 
249 	if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
250 		spdk_bdev_close(nv_cache->bdev_desc);
251 		nv_cache->bdev_desc = NULL;
252 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
253 		return -1;
254 	}
255 
256 	SPDK_INFOLOG(ftl_init, "Using %s as write buffer cache\n",
257 		     spdk_bdev_get_name(bdev));
258 
259 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
260 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
261 		return -1;
262 	}
263 
264 	if (!spdk_bdev_is_md_separate(bdev)) {
265 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
266 			    spdk_bdev_get_name(bdev));
267 		return -1;
268 	}
269 
270 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
271 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
272 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
273 		return -1;
274 	}
275 
276 	if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
277 		SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
278 			    spdk_bdev_get_name(bdev));
279 		return -1;
280 	}
281 
282 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
283 	 * from the fact that cache works as a protection against power loss, so before the data
284 	 * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
285 	 * extra block is needed to store the header.
286 	 */
287 	if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
288 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
289 			    PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
290 			    ftl_get_num_blocks_in_band(dev) * 2 + 1);
291 		return -1;
292 	}
293 
294 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
295 	if (rc < 0 || rc >= 128) {
296 		return -1;
297 	}
298 
299 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
300 						spdk_bdev_get_md_size(bdev) *
301 						conf->nv_cache.max_request_size,
302 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
303 						SPDK_ENV_SOCKET_ID_ANY);
304 	if (!nv_cache->md_pool) {
305 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
306 		return -1;
307 	}
308 
309 	nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
310 	if (!nv_cache->dma_buf) {
311 		SPDK_ERRLOG("Memory allocation failure\n");
312 		return -1;
313 	}
314 
315 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
316 		SPDK_ERRLOG("Failed to initialize cache lock\n");
317 		return -1;
318 	}
319 
320 	nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
321 	nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
322 	nv_cache->num_available = nv_cache->num_data_blocks;
323 	nv_cache->ready = false;
324 
325 	return 0;
326 }
327 
328 void
329 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
330 {
331 	*conf = g_default_conf;
332 }
333 
334 static void
335 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
336 {
337 	struct ftl_lba_map_request *request = obj;
338 	struct spdk_ftl_dev *dev = opaque;
339 
340 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
341 				    ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
342 }
343 
344 static int
345 ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
346 {
347 	char pool_name[128];
348 	int rc;
349 
350 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
351 	if (rc < 0 || rc >= (int)sizeof(pool_name)) {
352 		SPDK_ERRLOG("Failed to create media pool name\n");
353 		return -1;
354 	}
355 
356 	dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
357 				 sizeof(struct ftl_media_event),
358 				 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
359 				 SPDK_ENV_SOCKET_ID_ANY);
360 	if (!dev->media_events_pool) {
361 		SPDK_ERRLOG("Failed to create media events pool\n");
362 		return -1;
363 	}
364 
365 	return 0;
366 }
367 
368 static int
369 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
370 {
371 #define POOL_NAME_LEN 128
372 	char pool_name[POOL_NAME_LEN];
373 	int rc;
374 
375 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
376 	if (rc < 0 || rc >= POOL_NAME_LEN) {
377 		return -ENAMETOOLONG;
378 	}
379 
380 	/* We need to reserve at least 2 buffers for band close / open sequence
381 	 * alone, plus additional (8) buffers for handling write errors.
382 	 * TODO: This memory pool is utilized only by core thread - it introduce
383 	 * unnecessary overhead and should be replaced by different data structure.
384 	 */
385 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
386 					    ftl_lba_map_pool_elem_size(dev),
387 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
388 					    SPDK_ENV_SOCKET_ID_ANY);
389 	if (!dev->lba_pool) {
390 		return -ENOMEM;
391 	}
392 
393 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
394 	if (rc < 0 || rc >= POOL_NAME_LEN) {
395 		return -ENAMETOOLONG;
396 	}
397 
398 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
399 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
400 				sizeof(struct ftl_lba_map_request),
401 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
402 				SPDK_ENV_SOCKET_ID_ANY,
403 				ftl_lba_map_request_ctor,
404 				dev);
405 	if (!dev->lba_request_pool) {
406 		return -ENOMEM;
407 	}
408 
409 	return 0;
410 }
411 
412 static void
413 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
414 {
415 	LIST_INIT(&dev->wptr_list);
416 	LIST_INIT(&dev->flush_list);
417 	LIST_INIT(&dev->band_flush_list);
418 }
419 
420 static size_t
421 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
422 {
423 	struct ftl_band *band;
424 	size_t seq = 0;
425 
426 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
427 		if (band->seq > seq) {
428 			seq = band->seq;
429 		}
430 	}
431 
432 	return seq;
433 }
434 
435 static void
436 _ftl_init_bands_state(void *ctx)
437 {
438 	struct ftl_band *band, *temp_band;
439 	struct spdk_ftl_dev *dev = ctx;
440 
441 	dev->seq = ftl_dev_band_max_seq(dev);
442 
443 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
444 		if (!band->lba_map.num_vld) {
445 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
446 		}
447 	}
448 
449 	ftl_reloc_resume(dev->reloc);
450 	/* Clear the limit applications as they're incremented incorrectly by */
451 	/* the initialization code */
452 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
453 }
454 
455 static int
456 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
457 {
458 	struct ftl_band *band;
459 	int cnt = 0;
460 
461 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
462 		if (band->num_zones && !band->lba_map.num_vld) {
463 			cnt++;
464 		}
465 	}
466 	return cnt;
467 }
468 
469 static int
470 ftl_init_bands_state(struct spdk_ftl_dev *dev)
471 {
472 	/* TODO: Should we abort initialization or expose read only device */
473 	/* if there is no free bands? */
474 	/* If we abort initialization should we depend on condition that */
475 	/* we have no free bands or should we have some minimal number of */
476 	/* free bands? */
477 	if (!ftl_init_num_free_bands(dev)) {
478 		return -1;
479 	}
480 
481 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
482 	return 0;
483 }
484 
485 static void
486 _ftl_dev_init_core_thread(void *ctx)
487 {
488 	struct spdk_ftl_dev *dev = ctx;
489 
490 	dev->core_poller = SPDK_POLLER_REGISTER(ftl_task_core, dev, 0);
491 	if (!dev->core_poller) {
492 		SPDK_ERRLOG("Unable to register core poller\n");
493 		assert(0);
494 	}
495 
496 	dev->ioch = spdk_get_io_channel(dev);
497 }
498 
499 static int
500 ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
501 {
502 	if (!opts->core_thread) {
503 		return -1;
504 	}
505 
506 	dev->core_thread = opts->core_thread;
507 
508 	spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
509 	return 0;
510 }
511 
512 static int
513 ftl_dev_l2p_alloc_pmem(struct spdk_ftl_dev *dev, size_t l2p_size, const char *l2p_path)
514 {
515 #ifdef SPDK_CONFIG_PMDK
516 	int is_pmem;
517 
518 	if ((dev->l2p = pmem_map_file(l2p_path, 0,
519 				      0, 0, &dev->l2p_pmem_len, &is_pmem)) == NULL) {
520 		SPDK_ERRLOG("Failed to mmap l2p_path\n");
521 		return -1;
522 	}
523 
524 	if (!is_pmem) {
525 		SPDK_NOTICELOG("l2p_path mapped on non-pmem device\n");
526 	}
527 
528 	if (dev->l2p_pmem_len < l2p_size) {
529 		SPDK_ERRLOG("l2p_path file is too small\n");
530 		return -1;
531 	}
532 
533 	pmem_memset_persist(dev->l2p, FTL_ADDR_INVALID, l2p_size);
534 
535 	return 0;
536 #else /* SPDK_CONFIG_PMDK */
537 	SPDK_ERRLOG("Libpmem not available, cannot use pmem l2p_path\n");
538 	return -1;
539 #endif /* SPDK_CONFIG_PMDK */
540 }
541 
542 static int
543 ftl_dev_l2p_alloc_dram(struct spdk_ftl_dev *dev, size_t l2p_size)
544 {
545 	dev->l2p = malloc(l2p_size);
546 	if (!dev->l2p) {
547 		SPDK_ERRLOG("Failed to allocate l2p table\n");
548 		return -1;
549 	}
550 
551 	memset(dev->l2p, FTL_ADDR_INVALID, l2p_size);
552 
553 	return 0;
554 }
555 
556 static int
557 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
558 {
559 	size_t addr_size = dev->addr_len >= 32 ? 8 : 4;
560 	size_t l2p_size = dev->num_lbas * addr_size;
561 	const char *l2p_path = dev->conf.l2p_path;
562 
563 	if (dev->num_lbas == 0) {
564 		SPDK_ERRLOG("Invalid l2p table size\n");
565 		return -1;
566 	}
567 
568 	if (dev->l2p) {
569 		SPDK_ERRLOG("L2p table already allocated\n");
570 		return -1;
571 	}
572 
573 	dev->l2p_pmem_len = 0;
574 	if (l2p_path) {
575 		return ftl_dev_l2p_alloc_pmem(dev, l2p_size, l2p_path);
576 	} else {
577 		return ftl_dev_l2p_alloc_dram(dev, l2p_size);
578 	}
579 }
580 
581 static void
582 ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
583 {
584 	if (!init_ctx) {
585 		return;
586 	}
587 
588 	if (init_ctx->ioch) {
589 		spdk_put_io_channel(init_ctx->ioch);
590 	}
591 
592 	free(init_ctx);
593 }
594 
595 static void
596 ftl_call_init_complete_cb(void *ctx)
597 {
598 	struct ftl_dev_init_ctx *init_ctx = ctx;
599 	struct spdk_ftl_dev *dev = init_ctx->dev;
600 
601 	if (init_ctx->cb_fn != NULL) {
602 		init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
603 	}
604 
605 	ftl_dev_free_init_ctx(init_ctx);
606 }
607 
608 static void
609 ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
610 {
611 	struct spdk_ftl_dev *dev = init_ctx->dev;
612 
613 	pthread_mutex_lock(&g_ftl_queue_lock);
614 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
615 	pthread_mutex_unlock(&g_ftl_queue_lock);
616 
617 	dev->initialized = 1;
618 
619 	spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
620 }
621 
622 static void
623 ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
624 {
625 	struct ftl_dev_init_ctx *init_ctx = ctx;
626 
627 	if (init_ctx->cb_fn != NULL) {
628 		init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
629 	}
630 
631 	ftl_dev_free_init_ctx(init_ctx);
632 }
633 
634 static int ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
635 			struct spdk_thread *thread);
636 
637 static void
638 ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
639 {
640 	if (ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
641 		SPDK_ERRLOG("Unable to free the device\n");
642 		assert(0);
643 	}
644 }
645 
646 static void
647 ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
648 {
649 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
650 	struct spdk_ftl_dev *dev = init_ctx->dev;
651 
652 	spdk_bdev_free_io(bdev_io);
653 	if (spdk_unlikely(!success)) {
654 		SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
655 		ftl_init_fail(init_ctx);
656 		return;
657 	}
658 
659 	dev->nv_cache.ready = true;
660 	ftl_init_complete(init_ctx);
661 }
662 
663 static void
664 ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
665 {
666 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
667 	struct spdk_ftl_dev *dev = init_ctx->dev;
668 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
669 
670 	spdk_bdev_free_io(bdev_io);
671 	if (spdk_unlikely(!success)) {
672 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
673 		ftl_init_fail(init_ctx);
674 		return;
675 	}
676 
677 	nv_cache->phase = 1;
678 	if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
679 		SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
680 		ftl_init_fail(init_ctx);
681 	}
682 }
683 
684 static void
685 _ftl_nv_cache_scrub(void *ctx)
686 {
687 	struct ftl_dev_init_ctx *init_ctx = ctx;
688 	struct spdk_ftl_dev *dev = init_ctx->dev;
689 	int rc;
690 
691 	rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
692 
693 	if (spdk_unlikely(rc != 0)) {
694 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
695 			    spdk_strerror(-rc));
696 		ftl_init_fail(init_ctx);
697 	}
698 }
699 
700 static int
701 ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
702 {
703 	struct spdk_ftl_dev *dev = init_ctx->dev;
704 	struct spdk_ftl_conf *conf = &dev->conf;
705 	size_t i;
706 
707 	spdk_uuid_generate(&dev->uuid);
708 
709 	dev->num_lbas = 0;
710 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
711 		dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
712 	}
713 
714 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
715 
716 	if (ftl_dev_l2p_alloc(dev)) {
717 		SPDK_ERRLOG("Unable to init l2p table\n");
718 		return -1;
719 	}
720 
721 	if (ftl_init_bands_state(dev)) {
722 		SPDK_ERRLOG("Unable to finish the initialization\n");
723 		return -1;
724 	}
725 
726 	if (!ftl_dev_has_nv_cache(dev)) {
727 		ftl_init_complete(init_ctx);
728 	} else {
729 		spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
730 	}
731 
732 	return 0;
733 }
734 
735 static void
736 ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
737 {
738 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
739 
740 	if (spdk_unlikely(status != 0)) {
741 		SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
742 		ftl_init_fail(init_ctx);
743 		return;
744 	}
745 
746 	ftl_init_complete(init_ctx);
747 }
748 
749 static void
750 ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
751 {
752 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
753 	struct spdk_ftl_dev *dev = init_ctx->dev;
754 
755 	if (status) {
756 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
757 		ftl_init_fail(init_ctx);
758 		return;
759 	}
760 
761 	if (ftl_init_bands_state(dev)) {
762 		SPDK_ERRLOG("Unable to finish the initialization\n");
763 		ftl_init_fail(init_ctx);
764 		return;
765 	}
766 
767 	if (!ftl_dev_has_nv_cache(dev)) {
768 		ftl_init_complete(init_ctx);
769 		return;
770 	}
771 
772 	ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
773 }
774 
775 static void
776 ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
777 {
778 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
779 
780 	if (status) {
781 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
782 		goto error;
783 	}
784 
785 	/* After the metadata is read it should be possible to allocate the L2P */
786 	if (ftl_dev_l2p_alloc(init_ctx->dev)) {
787 		SPDK_ERRLOG("Failed to allocate the L2P\n");
788 		goto error;
789 	}
790 
791 	if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
792 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
793 		goto error;
794 	}
795 
796 	return;
797 error:
798 	ftl_init_fail(init_ctx);
799 }
800 
801 static int
802 ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
803 {
804 	struct spdk_ftl_dev *dev = init_ctx->dev;
805 
806 	dev->uuid = init_ctx->opts.uuid;
807 
808 	if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
809 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
810 		return -1;
811 	}
812 
813 	return 0;
814 }
815 
816 static void
817 ftl_dev_update_bands(struct spdk_ftl_dev *dev)
818 {
819 	struct ftl_band *band, *temp_band;
820 	size_t i;
821 
822 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
823 		band = &dev->bands[i];
824 		band->tail_md_addr = ftl_band_tail_md_addr(band);
825 	}
826 
827 	/* Remove band from shut_bands list to prevent further processing */
828 	/* if all blocks on this band are bad */
829 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
830 		if (!band->num_zones) {
831 			dev->num_bands--;
832 			LIST_REMOVE(band, list_entry);
833 		}
834 	}
835 }
836 
837 static void
838 ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
839 {
840 	struct spdk_ftl_dev *dev = init_ctx->dev;
841 
842 	ftl_dev_update_bands(dev);
843 
844 	if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
845 		SPDK_ERRLOG("Unable to initialize device thread\n");
846 		ftl_init_fail(init_ctx);
847 		return;
848 	}
849 
850 	if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
851 		if (ftl_setup_initial_state(init_ctx)) {
852 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
853 			ftl_init_fail(init_ctx);
854 			return;
855 		}
856 	} else {
857 		if (ftl_restore_state(init_ctx)) {
858 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
859 			ftl_init_fail(init_ctx);
860 			return;
861 		}
862 	}
863 }
864 
865 static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
866 
867 static void
868 ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
869 {
870 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
871 	struct spdk_ftl_dev *dev = init_ctx->dev;
872 	struct ftl_band *band;
873 	struct ftl_zone *zone;
874 	struct ftl_addr addr;
875 	size_t i, zones_left, num_zones;
876 
877 	spdk_bdev_free_io(bdev_io);
878 
879 	if (spdk_unlikely(!success)) {
880 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
881 		ftl_init_fail(init_ctx);
882 		return;
883 	}
884 
885 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
886 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
887 
888 	for (i = 0; i < num_zones; ++i) {
889 		addr.offset = init_ctx->info[i].zone_id;
890 		band = &dev->bands[ftl_addr_get_band(dev, addr)];
891 		zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
892 		zone->info = init_ctx->info[i];
893 
894 		/* TODO: add support for zone capacity less than zone size */
895 		if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
896 			zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
897 			SPDK_ERRLOG("Zone capacity is not equal zone size for "
898 				    "zone id: %"PRIu64"\n", init_ctx->zone_id);
899 		}
900 
901 		/* Set write pointer to the last block plus one for zone in full state */
902 		if (zone->info.state == SPDK_BDEV_ZONE_STATE_FULL) {
903 			zone->info.write_pointer = zone->info.zone_id + zone->info.capacity;
904 		}
905 
906 		if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
907 			band->num_zones++;
908 			CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
909 		}
910 	}
911 
912 	init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
913 
914 	ftl_dev_get_zone_info(init_ctx);
915 }
916 
917 static void
918 ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
919 {
920 	struct spdk_ftl_dev *dev = init_ctx->dev;
921 	size_t zones_left, num_zones;
922 	int rc;
923 
924 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
925 	if (zones_left == 0) {
926 		ftl_dev_init_state(init_ctx);
927 		return;
928 	}
929 
930 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
931 
932 	rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
933 				     init_ctx->zone_id, num_zones, init_ctx->info,
934 				     ftl_dev_get_zone_info_cb, init_ctx);
935 
936 	if (spdk_unlikely(rc != 0)) {
937 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
938 		ftl_init_fail(init_ctx);
939 	}
940 }
941 
942 static int
943 ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
944 {
945 	struct spdk_ftl_dev *dev =  init_ctx->dev;
946 
947 	init_ctx->zone_id = 0;
948 	init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
949 	if (!init_ctx->ioch) {
950 		SPDK_ERRLOG("Failed to get base bdev IO channel\n");
951 		return -1;
952 	}
953 
954 	ftl_dev_get_zone_info(init_ctx);
955 
956 	return 0;
957 }
958 
959 struct _ftl_io_channel {
960 	struct ftl_io_channel *ioch;
961 };
962 
963 struct ftl_io_channel *
964 ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
965 {
966 	struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
967 
968 	return _ioch->ioch;
969 }
970 
971 static void
972 ftl_io_channel_register(void *ctx)
973 {
974 	struct ftl_io_channel *ioch = ctx;
975 	struct spdk_ftl_dev *dev = ioch->dev;
976 	uint32_t ioch_index;
977 
978 	for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
979 		if (dev->ioch_array[ioch_index] == NULL) {
980 			dev->ioch_array[ioch_index] = ioch;
981 			ioch->index = ioch_index;
982 			break;
983 		}
984 	}
985 
986 	assert(ioch_index < dev->conf.max_io_channels);
987 	TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
988 }
989 
990 static int
991 ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
992 {
993 	struct spdk_ftl_dev *dev = ioch->dev;
994 	struct ftl_wbuf_entry *entry;
995 	uint32_t i;
996 	int rc;
997 
998 	ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
999 	ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
1000 	if (ioch->wbuf_entries == NULL) {
1001 		SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
1002 		return -1;
1003 	}
1004 
1005 	ioch->qdepth_limit = ioch->num_entries;
1006 	ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
1007 					  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1008 	if (ioch->wbuf_payload == NULL) {
1009 		SPDK_ERRLOG("Failed to allocate write buffer payload\n");
1010 		goto error_entries;
1011 	}
1012 
1013 	ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1014 					    spdk_align32pow2(ioch->num_entries + 1),
1015 					    SPDK_ENV_SOCKET_ID_ANY);
1016 	if (ioch->free_queue == NULL) {
1017 		SPDK_ERRLOG("Failed to allocate free queue\n");
1018 		goto error_payload;
1019 	}
1020 
1021 	ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1022 					      spdk_align32pow2(ioch->num_entries + 1),
1023 					      SPDK_ENV_SOCKET_ID_ANY);
1024 	if (ioch->submit_queue == NULL) {
1025 		SPDK_ERRLOG("Failed to allocate submit queue\n");
1026 		goto error_free_queue;
1027 	}
1028 
1029 	for (i = 0; i < ioch->num_entries; ++i) {
1030 		entry = &ioch->wbuf_entries[i];
1031 		entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
1032 		entry->ioch = ioch;
1033 		entry->index = i;
1034 		entry->addr.offset = FTL_ADDR_INVALID;
1035 
1036 		rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
1037 		if (rc != 0) {
1038 			SPDK_ERRLOG("Failed to initialize spinlock\n");
1039 			goto error_spinlock;
1040 		}
1041 
1042 		spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
1043 	}
1044 
1045 	return 0;
1046 error_spinlock:
1047 	for (; i > 0; --i) {
1048 		pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
1049 	}
1050 
1051 	spdk_ring_free(ioch->submit_queue);
1052 error_free_queue:
1053 	spdk_ring_free(ioch->free_queue);
1054 error_payload:
1055 	spdk_free(ioch->wbuf_payload);
1056 error_entries:
1057 	free(ioch->wbuf_entries);
1058 
1059 	return -1;
1060 }
1061 
1062 static int
1063 ftl_io_channel_create_cb(void *io_device, void *ctx)
1064 {
1065 	struct spdk_ftl_dev *dev = io_device;
1066 	struct _ftl_io_channel *_ioch = ctx;
1067 	struct ftl_io_channel *ioch;
1068 	uint32_t num_io_channels;
1069 	char mempool_name[32];
1070 	int rc;
1071 
1072 	num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1073 	if (num_io_channels >= dev->conf.max_io_channels) {
1074 		SPDK_ERRLOG("Reached maximum number of IO channels\n");
1075 		__atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1076 		return -1;
1077 	}
1078 
1079 	ioch = calloc(1, sizeof(*ioch));
1080 	if (ioch == NULL) {
1081 		SPDK_ERRLOG("Failed to allocate IO channel\n");
1082 		return -1;
1083 	}
1084 
1085 	rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
1086 	if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
1087 		SPDK_ERRLOG("Failed to create IO channel pool name\n");
1088 		free(ioch);
1089 		return -1;
1090 	}
1091 
1092 	ioch->cache_ioch = NULL;
1093 	ioch->index = FTL_IO_CHANNEL_INDEX_INVALID;
1094 	ioch->dev = dev;
1095 	ioch->elem_size = sizeof(struct ftl_md_io);
1096 	ioch->io_pool = spdk_mempool_create(mempool_name,
1097 					    dev->conf.user_io_pool_size,
1098 					    ioch->elem_size,
1099 					    0,
1100 					    SPDK_ENV_SOCKET_ID_ANY);
1101 	if (!ioch->io_pool) {
1102 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
1103 		free(ioch);
1104 		return -1;
1105 	}
1106 
1107 	ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
1108 	if (!ioch->base_ioch) {
1109 		SPDK_ERRLOG("Failed to create base bdev IO channel\n");
1110 		goto fail_ioch;
1111 	}
1112 
1113 	if (ftl_dev_has_nv_cache(dev)) {
1114 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
1115 		if (!ioch->cache_ioch) {
1116 			SPDK_ERRLOG("Failed to create cache IO channel\n");
1117 			goto fail_cache;
1118 		}
1119 	}
1120 
1121 	TAILQ_INIT(&ioch->write_cmpl_queue);
1122 	TAILQ_INIT(&ioch->retry_queue);
1123 	ioch->poller = SPDK_POLLER_REGISTER(ftl_io_channel_poll, ioch, 0);
1124 	if (!ioch->poller) {
1125 		SPDK_ERRLOG("Failed to register IO channel poller\n");
1126 		goto fail_poller;
1127 	}
1128 
1129 	if (ftl_io_channel_init_wbuf(ioch)) {
1130 		SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
1131 		goto fail_wbuf;
1132 	}
1133 
1134 	_ioch->ioch = ioch;
1135 
1136 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
1137 
1138 	return 0;
1139 fail_wbuf:
1140 	spdk_poller_unregister(&ioch->poller);
1141 fail_poller:
1142 	if (ioch->cache_ioch) {
1143 		spdk_put_io_channel(ioch->cache_ioch);
1144 	}
1145 fail_cache:
1146 	spdk_put_io_channel(ioch->base_ioch);
1147 fail_ioch:
1148 	spdk_mempool_free(ioch->io_pool);
1149 	free(ioch);
1150 
1151 	return -1;
1152 }
1153 
1154 static void
1155 ftl_io_channel_unregister(void *ctx)
1156 {
1157 	struct ftl_io_channel *ioch = ctx;
1158 	struct spdk_ftl_dev *dev = ioch->dev;
1159 	uint32_t i, num_io_channels __attribute__((unused));
1160 
1161 	assert(ioch->index < dev->conf.max_io_channels);
1162 	assert(dev->ioch_array[ioch->index] == ioch);
1163 
1164 	dev->ioch_array[ioch->index] = NULL;
1165 	TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
1166 
1167 	num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1168 	assert(num_io_channels > 0);
1169 
1170 	for (i = 0; i < ioch->num_entries; ++i) {
1171 		pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
1172 	}
1173 
1174 	spdk_mempool_free(ioch->io_pool);
1175 	spdk_ring_free(ioch->free_queue);
1176 	spdk_ring_free(ioch->submit_queue);
1177 	spdk_free(ioch->wbuf_payload);
1178 	free(ioch->wbuf_entries);
1179 	free(ioch);
1180 }
1181 
1182 static void
1183 _ftl_io_channel_destroy_cb(void *ctx)
1184 {
1185 	struct ftl_io_channel *ioch = ctx;
1186 	struct spdk_ftl_dev *dev = ioch->dev;
1187 	uint32_t i;
1188 
1189 	/* Do not destroy the channel if some of its entries are still in use */
1190 	if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
1191 		spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
1192 		return;
1193 	}
1194 
1195 	/* Evict all valid entries from cache */
1196 	for (i = 0; i < ioch->num_entries; ++i) {
1197 		ftl_evict_cache_entry(dev, &ioch->wbuf_entries[i]);
1198 	}
1199 
1200 	spdk_poller_unregister(&ioch->poller);
1201 
1202 	spdk_put_io_channel(ioch->base_ioch);
1203 	if (ioch->cache_ioch) {
1204 		spdk_put_io_channel(ioch->cache_ioch);
1205 	}
1206 
1207 	ioch->base_ioch = NULL;
1208 	ioch->cache_ioch = NULL;
1209 
1210 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
1211 }
1212 
1213 static void
1214 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
1215 {
1216 	struct _ftl_io_channel *_ioch = ctx;
1217 	struct ftl_io_channel *ioch = _ioch->ioch;
1218 
1219 	/* Mark the IO channel as being flush to force out any unwritten entries */
1220 	ioch->flush = true;
1221 
1222 	_ftl_io_channel_destroy_cb(ioch);
1223 }
1224 
1225 static int
1226 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
1227 {
1228 	struct ftl_batch *batch;
1229 	uint32_t i;
1230 
1231 	/* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
1232 	dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
1233 	dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
1234 
1235 	dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
1236 	if (!dev->ioch_array) {
1237 		SPDK_ERRLOG("Failed to allocate IO channel array\n");
1238 		return -1;
1239 	}
1240 
1241 	if (dev->md_size > 0) {
1242 		dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
1243 					   dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
1244 					   SPDK_MALLOC_DMA);
1245 		if (dev->md_buf == NULL) {
1246 			SPDK_ERRLOG("Failed to allocate metadata buffer\n");
1247 			return -1;
1248 		}
1249 	}
1250 
1251 	dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
1252 	if (!dev->iov_buf) {
1253 		SPDK_ERRLOG("Failed to allocate iovec buffer\n");
1254 		return -1;
1255 	}
1256 
1257 	TAILQ_INIT(&dev->free_batches);
1258 	TAILQ_INIT(&dev->pending_batches);
1259 	TAILQ_INIT(&dev->ioch_queue);
1260 
1261 	for (i = 0; i < FTL_BATCH_COUNT; ++i) {
1262 		batch = &dev->batch_array[i];
1263 		batch->iov = &dev->iov_buf[i * dev->xfer_size];
1264 		batch->num_entries = 0;
1265 		batch->index = i;
1266 		TAILQ_INIT(&batch->entries);
1267 		if (dev->md_buf != NULL) {
1268 			batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
1269 		}
1270 
1271 		TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
1272 	}
1273 
1274 	dev->num_io_channels = 0;
1275 
1276 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
1277 				sizeof(struct _ftl_io_channel),
1278 				NULL);
1279 
1280 	return 0;
1281 }
1282 
1283 static int
1284 ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
1285 {
1286 	uint32_t block_size;
1287 	uint64_t num_blocks;
1288 	struct spdk_bdev *bdev;
1289 
1290 	bdev = spdk_bdev_get_by_name(bdev_name);
1291 	if (!bdev) {
1292 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
1293 		return -1;
1294 	}
1295 
1296 	if (!spdk_bdev_is_zoned(bdev)) {
1297 		SPDK_ERRLOG("Bdev doesn't support zone capabilities: %s\n",
1298 			    spdk_bdev_get_name(bdev));
1299 		return -1;
1300 	}
1301 
1302 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
1303 			       dev, &dev->base_bdev_desc)) {
1304 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
1305 		return -1;
1306 	}
1307 
1308 	if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
1309 		spdk_bdev_close(dev->base_bdev_desc);
1310 		dev->base_bdev_desc = NULL;
1311 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
1312 		return -1;
1313 	}
1314 
1315 	dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
1316 	dev->md_size = spdk_bdev_get_md_size(bdev);
1317 
1318 	block_size = spdk_bdev_get_block_size(bdev);
1319 	if (block_size != FTL_BLOCK_SIZE) {
1320 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
1321 		return -1;
1322 	}
1323 
1324 	num_blocks = spdk_bdev_get_num_blocks(bdev);
1325 	if (num_blocks % ftl_get_num_punits(dev)) {
1326 		SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
1327 			    "of optimal number of zones.\n");
1328 		return -1;
1329 	}
1330 
1331 	if (ftl_is_append_supported(dev) &&
1332 	    !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1333 		SPDK_ERRLOG("Bdev doesn't support append: %s\n",
1334 			    spdk_bdev_get_name(bdev));
1335 		return -1;
1336 	}
1337 
1338 	dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
1339 	dev->addr_len = spdk_u64log2(num_blocks) + 1;
1340 
1341 	return 0;
1342 }
1343 
1344 static void
1345 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1346 {
1347 	struct ftl_lba_map_request *request = obj;
1348 
1349 	spdk_bit_array_free(&request->segments);
1350 }
1351 
1352 static void
1353 ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
1354 {
1355 	if (!bdev_desc) {
1356 		return;
1357 	}
1358 
1359 	spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
1360 	spdk_bdev_close(bdev_desc);
1361 }
1362 
1363 static void
1364 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1365 {
1366 	struct spdk_ftl_dev *iter;
1367 	size_t i;
1368 
1369 	if (!dev) {
1370 		return;
1371 	}
1372 
1373 	pthread_mutex_lock(&g_ftl_queue_lock);
1374 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1375 		if (iter == dev) {
1376 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1377 			break;
1378 		}
1379 	}
1380 	pthread_mutex_unlock(&g_ftl_queue_lock);
1381 
1382 	assert(LIST_EMPTY(&dev->wptr_list));
1383 	assert(dev->current_batch == NULL);
1384 
1385 	ftl_dev_dump_bands(dev);
1386 	ftl_dev_dump_stats(dev);
1387 
1388 	if (dev->bands) {
1389 		for (i = 0; i < ftl_get_num_bands(dev); ++i) {
1390 			free(dev->bands[i].zone_buf);
1391 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1392 			spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
1393 		}
1394 	}
1395 
1396 	spdk_dma_free(dev->nv_cache.dma_buf);
1397 
1398 	spdk_mempool_free(dev->lba_pool);
1399 	spdk_mempool_free(dev->nv_cache.md_pool);
1400 	spdk_mempool_free(dev->media_events_pool);
1401 	if (dev->lba_request_pool) {
1402 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1403 	}
1404 	spdk_mempool_free(dev->lba_request_pool);
1405 
1406 	ftl_reloc_free(dev->reloc);
1407 
1408 	ftl_release_bdev(dev->nv_cache.bdev_desc);
1409 	ftl_release_bdev(dev->base_bdev_desc);
1410 
1411 	spdk_free(dev->md_buf);
1412 
1413 	assert(dev->num_io_channels == 0);
1414 	free(dev->ioch_array);
1415 	free(dev->iov_buf);
1416 	free(dev->name);
1417 	free(dev->bands);
1418 	if (dev->l2p_pmem_len != 0) {
1419 #ifdef SPDK_CONFIG_PMDK
1420 		pmem_unmap(dev->l2p, dev->l2p_pmem_len);
1421 #endif /* SPDK_CONFIG_PMDK */
1422 	} else {
1423 		free(dev->l2p);
1424 	}
1425 	free((char *)dev->conf.l2p_path);
1426 	free(dev);
1427 }
1428 
1429 int
1430 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
1431 {
1432 	struct spdk_ftl_dev *dev;
1433 	struct spdk_ftl_dev_init_opts opts = *_opts;
1434 	struct ftl_dev_init_ctx *init_ctx = NULL;
1435 	int rc = -ENOMEM;
1436 
1437 	dev = calloc(1, sizeof(*dev));
1438 	if (!dev) {
1439 		return -ENOMEM;
1440 	}
1441 
1442 	init_ctx = calloc(1, sizeof(*init_ctx));
1443 	if (!init_ctx) {
1444 		goto fail_sync;
1445 	}
1446 
1447 	init_ctx->dev = dev;
1448 	init_ctx->opts = *_opts;
1449 	init_ctx->cb_fn = cb_fn;
1450 	init_ctx->cb_arg = cb_arg;
1451 	init_ctx->thread = spdk_get_thread();
1452 
1453 	if (!opts.conf) {
1454 		opts.conf = &g_default_conf;
1455 	}
1456 
1457 	if (!opts.base_bdev) {
1458 		SPDK_ERRLOG("Lack of underlying device in configuration\n");
1459 		rc = -EINVAL;
1460 		goto fail_sync;
1461 	}
1462 
1463 	dev->conf = *opts.conf;
1464 	dev->limit = SPDK_FTL_LIMIT_MAX;
1465 
1466 	dev->name = strdup(opts.name);
1467 	if (!dev->name) {
1468 		SPDK_ERRLOG("Unable to set device name\n");
1469 		goto fail_sync;
1470 	}
1471 
1472 	if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
1473 		SPDK_ERRLOG("Unsupported underlying device\n");
1474 		goto fail_sync;
1475 	}
1476 
1477 	if (opts.conf->l2p_path) {
1478 		dev->conf.l2p_path = strdup(opts.conf->l2p_path);
1479 		if (!dev->conf.l2p_path) {
1480 			rc = -ENOMEM;
1481 			goto fail_sync;
1482 		}
1483 	}
1484 
1485 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1486 	/* so we don't have to clean up in each of the init functions. */
1487 	if (ftl_check_conf(dev, opts.conf)) {
1488 		SPDK_ERRLOG("Invalid device configuration\n");
1489 		goto fail_sync;
1490 	}
1491 
1492 	if (ftl_init_lba_map_pools(dev)) {
1493 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1494 		goto fail_sync;
1495 	}
1496 
1497 	if (ftl_init_media_events_pool(dev)) {
1498 		SPDK_ERRLOG("Unable to init media events pools\n");
1499 		goto fail_sync;
1500 	}
1501 
1502 	ftl_init_wptr_list(dev);
1503 
1504 	if (ftl_dev_init_bands(dev)) {
1505 		SPDK_ERRLOG("Unable to initialize band array\n");
1506 		goto fail_sync;
1507 	}
1508 
1509 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
1510 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1511 		goto fail_sync;
1512 	}
1513 
1514 	dev->reloc = ftl_reloc_init(dev);
1515 	if (!dev->reloc) {
1516 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1517 		goto fail_sync;
1518 	}
1519 
1520 	if (ftl_dev_init_io_channel(dev)) {
1521 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1522 		goto fail_sync;
1523 	}
1524 
1525 	if (ftl_dev_init_zones(init_ctx)) {
1526 		SPDK_ERRLOG("Failed to initialize zones\n");
1527 		goto fail_async;
1528 	}
1529 
1530 	return 0;
1531 fail_sync:
1532 	ftl_dev_free_sync(dev);
1533 	ftl_dev_free_init_ctx(init_ctx);
1534 	return rc;
1535 fail_async:
1536 	ftl_init_fail(init_ctx);
1537 	return 0;
1538 }
1539 
1540 static void
1541 _ftl_halt_defrag(void *arg)
1542 {
1543 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1544 }
1545 
1546 static void
1547 ftl_halt_complete_cb(void *ctx)
1548 {
1549 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1550 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1551 
1552 	/* Make sure core IO channel has already been released */
1553 	if (dev->num_io_channels > 0) {
1554 		spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1555 		return;
1556 	}
1557 
1558 	spdk_io_device_unregister(fini_ctx->dev, NULL);
1559 
1560 	ftl_dev_free_sync(fini_ctx->dev);
1561 	if (fini_ctx->cb_fn != NULL) {
1562 		fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
1563 	}
1564 
1565 	ftl_dev_free_init_ctx(fini_ctx);
1566 }
1567 
1568 static void
1569 ftl_put_io_channel_cb(void *ctx)
1570 {
1571 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1572 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1573 
1574 	spdk_put_io_channel(dev->ioch);
1575 	spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1576 }
1577 
1578 static void
1579 ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1580 {
1581 	struct ftl_dev_init_ctx *fini_ctx = cb_arg;
1582 	int rc = 0;
1583 
1584 	spdk_bdev_free_io(bdev_io);
1585 	if (spdk_unlikely(!success)) {
1586 		SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
1587 		rc = -EIO;
1588 	}
1589 
1590 	fini_ctx->halt_complete_status = rc;
1591 	spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1592 }
1593 
1594 static int
1595 ftl_halt_poller(void *ctx)
1596 {
1597 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1598 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1599 
1600 	if (!dev->core_poller) {
1601 		spdk_poller_unregister(&fini_ctx->poller);
1602 
1603 		if (ftl_dev_has_nv_cache(dev)) {
1604 			ftl_nv_cache_write_header(&dev->nv_cache, true,
1605 						  ftl_nv_cache_header_fini_cb, fini_ctx);
1606 		} else {
1607 			fini_ctx->halt_complete_status = 0;
1608 			spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1609 		}
1610 	}
1611 
1612 	return SPDK_POLLER_BUSY;
1613 }
1614 
1615 static void
1616 ftl_add_halt_poller(void *ctx)
1617 {
1618 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1619 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1620 
1621 	dev->halt = 1;
1622 
1623 	_ftl_halt_defrag(dev);
1624 
1625 	assert(!fini_ctx->poller);
1626 	fini_ctx->poller = SPDK_POLLER_REGISTER(ftl_halt_poller, fini_ctx, 100);
1627 }
1628 
1629 static int
1630 ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
1631 	     struct spdk_thread *thread)
1632 {
1633 	struct ftl_dev_init_ctx *fini_ctx;
1634 
1635 	if (dev->halt_started) {
1636 		dev->halt_started = true;
1637 		return -EBUSY;
1638 	}
1639 
1640 	fini_ctx = calloc(1, sizeof(*fini_ctx));
1641 	if (!fini_ctx) {
1642 		return -ENOMEM;
1643 	}
1644 
1645 	fini_ctx->dev = dev;
1646 	fini_ctx->cb_fn = cb_fn;
1647 	fini_ctx->cb_arg = cb_arg;
1648 	fini_ctx->thread = thread;
1649 
1650 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
1651 	return 0;
1652 }
1653 
1654 int
1655 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
1656 {
1657 	return ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
1658 }
1659 
1660 SPDK_LOG_REGISTER_COMPONENT(ftl_init)
1661