xref: /spdk/lib/ftl/ftl_init.c (revision 03e3fc4f5835983a4e6602b4e770922e798ce263)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/thread.h"
37 #include "spdk/string.h"
38 #include "spdk/likely.h"
39 #include "spdk_internal/log.h"
40 #include "spdk/ftl.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/bdev_zone.h"
44 #include "spdk/bdev_module.h"
45 #include "spdk/config.h"
46 
47 #include "ftl_core.h"
48 #include "ftl_io.h"
49 #include "ftl_reloc.h"
50 #include "ftl_band.h"
51 #include "ftl_debug.h"
52 
53 #ifdef SPDK_CONFIG_PMDK
54 #include "libpmem.h"
55 #endif /* SPDK_CONFIG_PMDK */
56 
57 #define FTL_CORE_RING_SIZE	4096
58 #define FTL_INIT_TIMEOUT	30
59 #define FTL_NSID		1
60 #define FTL_ZONE_INFO_COUNT	64
61 
62 /* Dummy bdev module used to to claim bdevs. */
63 static struct spdk_bdev_module g_ftl_bdev_module = {
64 	.name	= "ftl_lib",
65 };
66 
67 struct ftl_dev_init_ctx {
68 	/* Owner */
69 	struct spdk_ftl_dev		*dev;
70 	/* Initial arguments */
71 	struct spdk_ftl_dev_init_opts	opts;
72 	/* IO channel for zone info retrieving */
73 	struct spdk_io_channel		*ioch;
74 	/* Buffer for reading zone info  */
75 	struct spdk_bdev_zone_info	info[FTL_ZONE_INFO_COUNT];
76 	/* Currently read zone */
77 	size_t				zone_id;
78 	/* User's callback */
79 	spdk_ftl_init_fn		cb_fn;
80 	/* Callback's argument */
81 	void				*cb_arg;
82 	/* Thread to call the callback on */
83 	struct spdk_thread		*thread;
84 	/* Poller to check if the device has been destroyed/initialized */
85 	struct spdk_poller		*poller;
86 	/* Status to return for halt completion callback */
87 	int				halt_complete_status;
88 };
89 
90 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
91 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
92 static const struct spdk_ftl_conf	g_default_conf = {
93 	.limits = {
94 		/* 5 free bands  / 0 % host writes */
95 		[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
96 		/* 10 free bands / 5 % host writes */
97 		[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
98 		/* 20 free bands / 40 % host writes */
99 		[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
100 		/* 40 free bands / 100 % host writes - defrag starts running */
101 		[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
102 	},
103 	/* 10 percent valid blocks */
104 	.invalid_thld = 10,
105 	/* 20% spare blocks */
106 	.lba_rsvd = 20,
107 	/* 6M write buffer per each IO channel */
108 	.write_buffer_size = 6 * 1024 * 1024,
109 	/* 90% band fill threshold */
110 	.band_thld = 90,
111 	/* Max 32 IO depth per band relocate */
112 	.max_reloc_qdepth = 32,
113 	/* Max 3 active band relocates */
114 	.max_active_relocs = 3,
115 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
116 	.user_io_pool_size = 2048,
117 	/*
118 	 * If clear ftl will return error when restoring after a dirty shutdown
119 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
120 	 * will result in lost data after recovery.
121 	 */
122 	.allow_open_bands = false,
123 	.max_io_channels = 128,
124 	.nv_cache = {
125 		/* Maximum number of concurrent requests */
126 		.max_request_cnt = 2048,
127 		/* Maximum number of blocks per request */
128 		.max_request_size = 16,
129 	}
130 };
131 
132 static int
133 ftl_band_init_md(struct ftl_band *band)
134 {
135 	struct ftl_lba_map *lba_map = &band->lba_map;
136 
137 	lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
138 	if (!lba_map->vld) {
139 		return -ENOMEM;
140 	}
141 
142 	pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
143 	ftl_band_md_clear(band);
144 	return 0;
145 }
146 
147 static int
148 ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
149 {
150 	size_t i;
151 
152 	if (conf->invalid_thld >= 100) {
153 		return -1;
154 	}
155 	if (conf->lba_rsvd >= 100) {
156 		return -1;
157 	}
158 	if (conf->lba_rsvd == 0) {
159 		return -1;
160 	}
161 	if (conf->write_buffer_size == 0) {
162 		return -1;
163 	}
164 	if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
165 		return -1;
166 	}
167 
168 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
169 		if (conf->limits[i].limit > 100) {
170 			return -1;
171 		}
172 	}
173 
174 	return 0;
175 }
176 
177 static int
178 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
179 {
180 	struct ftl_band *band, *pband;
181 	unsigned int i;
182 	int rc = 0;
183 
184 	LIST_INIT(&dev->free_bands);
185 	LIST_INIT(&dev->shut_bands);
186 
187 	dev->num_free = 0;
188 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
189 	if (!dev->bands) {
190 		return -1;
191 	}
192 
193 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
194 		band = &dev->bands[i];
195 		band->id = i;
196 		band->dev = dev;
197 		band->state = FTL_BAND_STATE_CLOSED;
198 
199 		if (LIST_EMPTY(&dev->shut_bands)) {
200 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
201 		} else {
202 			LIST_INSERT_AFTER(pband, band, list_entry);
203 		}
204 		pband = band;
205 
206 		CIRCLEQ_INIT(&band->zones);
207 		band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
208 		if (!band->zone_buf) {
209 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
210 			rc = -1;
211 			break;
212 		}
213 
214 		rc = ftl_band_init_md(band);
215 		if (rc) {
216 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
217 			break;
218 		}
219 
220 		band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
221 		if (!band->reloc_bitmap) {
222 			SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
223 			break;
224 		}
225 	}
226 
227 	return rc;
228 }
229 
230 static void
231 ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
232 {
233 	struct spdk_ftl_dev *dev = event_ctx;
234 
235 	switch (type) {
236 	case SPDK_BDEV_EVENT_REMOVE:
237 		assert(0);
238 		break;
239 	case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
240 		assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
241 		ftl_get_media_events(dev);
242 	default:
243 		break;
244 	}
245 }
246 
247 static int
248 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
249 {
250 	struct spdk_bdev *bdev;
251 	struct spdk_ftl_conf *conf = &dev->conf;
252 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
253 	char pool_name[128];
254 	int rc;
255 
256 	if (!bdev_name) {
257 		return 0;
258 	}
259 
260 	bdev = spdk_bdev_get_by_name(bdev_name);
261 	if (!bdev) {
262 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
263 		return -1;
264 	}
265 
266 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
267 			       dev, &nv_cache->bdev_desc)) {
268 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
269 		return -1;
270 	}
271 
272 	if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
273 		spdk_bdev_close(nv_cache->bdev_desc);
274 		nv_cache->bdev_desc = NULL;
275 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
276 		return -1;
277 	}
278 
279 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
280 		     spdk_bdev_get_name(bdev));
281 
282 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
283 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
284 		return -1;
285 	}
286 
287 	if (!spdk_bdev_is_md_separate(bdev)) {
288 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
289 			    spdk_bdev_get_name(bdev));
290 		return -1;
291 	}
292 
293 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
294 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
295 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
296 		return -1;
297 	}
298 
299 	if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
300 		SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
301 			    spdk_bdev_get_name(bdev));
302 		return -1;
303 	}
304 
305 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
306 	 * from the fact that cache works as a protection against power loss, so before the data
307 	 * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
308 	 * extra block is needed to store the header.
309 	 */
310 	if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
311 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
312 			    PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
313 			    ftl_get_num_blocks_in_band(dev) * 2 + 1);
314 		return -1;
315 	}
316 
317 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
318 	if (rc < 0 || rc >= 128) {
319 		return -1;
320 	}
321 
322 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
323 						spdk_bdev_get_md_size(bdev) *
324 						conf->nv_cache.max_request_size,
325 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
326 						SPDK_ENV_SOCKET_ID_ANY);
327 	if (!nv_cache->md_pool) {
328 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
329 		return -1;
330 	}
331 
332 	nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
333 	if (!nv_cache->dma_buf) {
334 		SPDK_ERRLOG("Memory allocation failure\n");
335 		return -1;
336 	}
337 
338 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
339 		SPDK_ERRLOG("Failed to initialize cache lock\n");
340 		return -1;
341 	}
342 
343 	nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
344 	nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
345 	nv_cache->num_available = nv_cache->num_data_blocks;
346 	nv_cache->ready = false;
347 
348 	return 0;
349 }
350 
351 void
352 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
353 {
354 	*conf = g_default_conf;
355 }
356 
357 static void
358 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
359 {
360 	struct ftl_lba_map_request *request = obj;
361 	struct spdk_ftl_dev *dev = opaque;
362 
363 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
364 				    ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
365 }
366 
367 static int
368 ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
369 {
370 	char pool_name[128];
371 	int rc;
372 
373 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
374 	if (rc < 0 || rc >= (int)sizeof(pool_name)) {
375 		SPDK_ERRLOG("Failed to create media pool name\n");
376 		return -1;
377 	}
378 
379 	dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
380 				 sizeof(struct ftl_media_event),
381 				 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
382 				 SPDK_ENV_SOCKET_ID_ANY);
383 	if (!dev->media_events_pool) {
384 		SPDK_ERRLOG("Failed to create media events pool\n");
385 		return -1;
386 	}
387 
388 	return 0;
389 }
390 
391 static int
392 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
393 {
394 #define POOL_NAME_LEN 128
395 	char pool_name[POOL_NAME_LEN];
396 	int rc;
397 
398 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
399 	if (rc < 0 || rc >= POOL_NAME_LEN) {
400 		return -ENAMETOOLONG;
401 	}
402 
403 	/* We need to reserve at least 2 buffers for band close / open sequence
404 	 * alone, plus additional (8) buffers for handling write errors.
405 	 * TODO: This memory pool is utilized only by core thread - it introduce
406 	 * unnecessary overhead and should be replaced by different data structure.
407 	 */
408 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
409 					    ftl_lba_map_pool_elem_size(dev),
410 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
411 					    SPDK_ENV_SOCKET_ID_ANY);
412 	if (!dev->lba_pool) {
413 		return -ENOMEM;
414 	}
415 
416 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
417 	if (rc < 0 || rc >= POOL_NAME_LEN) {
418 		return -ENAMETOOLONG;
419 	}
420 
421 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
422 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
423 				sizeof(struct ftl_lba_map_request),
424 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
425 				SPDK_ENV_SOCKET_ID_ANY,
426 				ftl_lba_map_request_ctor,
427 				dev);
428 	if (!dev->lba_request_pool) {
429 		return -ENOMEM;
430 	}
431 
432 	return 0;
433 }
434 
435 static void
436 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
437 {
438 	LIST_INIT(&dev->wptr_list);
439 	LIST_INIT(&dev->flush_list);
440 	LIST_INIT(&dev->band_flush_list);
441 }
442 
443 static size_t
444 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
445 {
446 	struct ftl_band *band;
447 	size_t seq = 0;
448 
449 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
450 		if (band->seq > seq) {
451 			seq = band->seq;
452 		}
453 	}
454 
455 	return seq;
456 }
457 
458 static void
459 _ftl_init_bands_state(void *ctx)
460 {
461 	struct ftl_band *band, *temp_band;
462 	struct spdk_ftl_dev *dev = ctx;
463 
464 	dev->seq = ftl_dev_band_max_seq(dev);
465 
466 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
467 		if (!band->lba_map.num_vld) {
468 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
469 		}
470 	}
471 
472 	ftl_reloc_resume(dev->reloc);
473 	/* Clear the limit applications as they're incremented incorrectly by */
474 	/* the initialization code */
475 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
476 }
477 
478 static int
479 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
480 {
481 	struct ftl_band *band;
482 	int cnt = 0;
483 
484 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
485 		if (band->num_zones && !band->lba_map.num_vld) {
486 			cnt++;
487 		}
488 	}
489 	return cnt;
490 }
491 
492 static int
493 ftl_init_bands_state(struct spdk_ftl_dev *dev)
494 {
495 	/* TODO: Should we abort initialization or expose read only device */
496 	/* if there is no free bands? */
497 	/* If we abort initialization should we depend on condition that */
498 	/* we have no free bands or should we have some minimal number of */
499 	/* free bands? */
500 	if (!ftl_init_num_free_bands(dev)) {
501 		return -1;
502 	}
503 
504 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
505 	return 0;
506 }
507 
508 static void
509 _ftl_dev_init_core_thread(void *ctx)
510 {
511 	struct spdk_ftl_dev *dev = ctx;
512 
513 	dev->core_poller = SPDK_POLLER_REGISTER(ftl_task_core, dev, 0);
514 	if (!dev->core_poller) {
515 		SPDK_ERRLOG("Unable to register core poller\n");
516 		assert(0);
517 	}
518 
519 	dev->ioch = spdk_get_io_channel(dev);
520 }
521 
522 static int
523 ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
524 {
525 	if (!opts->core_thread) {
526 		return -1;
527 	}
528 
529 	dev->core_thread = opts->core_thread;
530 
531 	spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
532 	return 0;
533 }
534 
535 static int
536 ftl_dev_l2p_alloc_pmem(struct spdk_ftl_dev *dev, size_t l2p_size, const char *l2p_path)
537 {
538 #ifdef SPDK_CONFIG_PMDK
539 	int is_pmem;
540 
541 	if ((dev->l2p = pmem_map_file(l2p_path, 0,
542 				      0, 0, &dev->l2p_pmem_len, &is_pmem)) == NULL) {
543 		SPDK_ERRLOG("Failed to mmap l2p_path\n");
544 		return -1;
545 	}
546 
547 	if (!is_pmem) {
548 		SPDK_NOTICELOG("l2p_path mapped on non-pmem device\n");
549 	}
550 
551 	if (dev->l2p_pmem_len < l2p_size) {
552 		SPDK_ERRLOG("l2p_path file is too small\n");
553 		return -1;
554 	}
555 
556 	pmem_memset_persist(dev->l2p, FTL_ADDR_INVALID, l2p_size);
557 
558 	return 0;
559 #else /* SPDK_CONFIG_PMDK */
560 	SPDK_ERRLOG("Libpmem not available, cannot use pmem l2p_path\n");
561 	return -1;
562 #endif /* SPDK_CONFIG_PMDK */
563 }
564 
565 static int
566 ftl_dev_l2p_alloc_dram(struct spdk_ftl_dev *dev, size_t l2p_size)
567 {
568 	dev->l2p = malloc(l2p_size);
569 	if (!dev->l2p) {
570 		SPDK_ERRLOG("Failed to allocate l2p table\n");
571 		return -1;
572 	}
573 
574 	memset(dev->l2p, FTL_ADDR_INVALID, l2p_size);
575 
576 	return 0;
577 }
578 
579 static int
580 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
581 {
582 	size_t addr_size = dev->addr_len >= 32 ? 8 : 4;
583 	size_t l2p_size = dev->num_lbas * addr_size;
584 	const char *l2p_path = dev->conf.l2p_path;
585 
586 	if (dev->num_lbas == 0) {
587 		SPDK_ERRLOG("Invalid l2p table size\n");
588 		return -1;
589 	}
590 
591 	if (dev->l2p) {
592 		SPDK_ERRLOG("L2p table already allocated\n");
593 		return -1;
594 	}
595 
596 	dev->l2p_pmem_len = 0;
597 	if (l2p_path) {
598 		return ftl_dev_l2p_alloc_pmem(dev, l2p_size, l2p_path);
599 	} else {
600 		return ftl_dev_l2p_alloc_dram(dev, l2p_size);
601 	}
602 }
603 
604 static void
605 ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
606 {
607 	if (!init_ctx) {
608 		return;
609 	}
610 
611 	if (init_ctx->ioch) {
612 		spdk_put_io_channel(init_ctx->ioch);
613 	}
614 
615 	free(init_ctx);
616 }
617 
618 static void
619 ftl_call_init_complete_cb(void *ctx)
620 {
621 	struct ftl_dev_init_ctx *init_ctx = ctx;
622 	struct spdk_ftl_dev *dev = init_ctx->dev;
623 
624 	if (init_ctx->cb_fn != NULL) {
625 		init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
626 	}
627 
628 	ftl_dev_free_init_ctx(init_ctx);
629 }
630 
631 static void
632 ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
633 {
634 	struct spdk_ftl_dev *dev = init_ctx->dev;
635 
636 	pthread_mutex_lock(&g_ftl_queue_lock);
637 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
638 	pthread_mutex_unlock(&g_ftl_queue_lock);
639 
640 	dev->initialized = 1;
641 
642 	spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
643 }
644 
645 static void
646 ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
647 {
648 	struct ftl_dev_init_ctx *init_ctx = ctx;
649 
650 	if (init_ctx->cb_fn != NULL) {
651 		init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
652 	}
653 
654 	ftl_dev_free_init_ctx(init_ctx);
655 }
656 
657 static int ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
658 			struct spdk_thread *thread);
659 
660 static void
661 ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
662 {
663 	if (ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
664 		SPDK_ERRLOG("Unable to free the device\n");
665 		assert(0);
666 	}
667 }
668 
669 static void
670 ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
671 {
672 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
673 	struct spdk_ftl_dev *dev = init_ctx->dev;
674 
675 	spdk_bdev_free_io(bdev_io);
676 	if (spdk_unlikely(!success)) {
677 		SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
678 		ftl_init_fail(init_ctx);
679 		return;
680 	}
681 
682 	dev->nv_cache.ready = true;
683 	ftl_init_complete(init_ctx);
684 }
685 
686 static void
687 ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
688 {
689 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
690 	struct spdk_ftl_dev *dev = init_ctx->dev;
691 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
692 
693 	spdk_bdev_free_io(bdev_io);
694 	if (spdk_unlikely(!success)) {
695 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
696 		ftl_init_fail(init_ctx);
697 		return;
698 	}
699 
700 	nv_cache->phase = 1;
701 	if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
702 		SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
703 		ftl_init_fail(init_ctx);
704 	}
705 }
706 
707 static void
708 _ftl_nv_cache_scrub(void *ctx)
709 {
710 	struct ftl_dev_init_ctx *init_ctx = ctx;
711 	struct spdk_ftl_dev *dev = init_ctx->dev;
712 	int rc;
713 
714 	rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
715 
716 	if (spdk_unlikely(rc != 0)) {
717 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
718 			    spdk_strerror(-rc));
719 		ftl_init_fail(init_ctx);
720 	}
721 }
722 
723 static int
724 ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
725 {
726 	struct spdk_ftl_dev *dev = init_ctx->dev;
727 	struct spdk_ftl_conf *conf = &dev->conf;
728 	size_t i;
729 
730 	spdk_uuid_generate(&dev->uuid);
731 
732 	dev->num_lbas = 0;
733 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
734 		dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
735 	}
736 
737 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
738 
739 	if (ftl_dev_l2p_alloc(dev)) {
740 		SPDK_ERRLOG("Unable to init l2p table\n");
741 		return -1;
742 	}
743 
744 	if (ftl_init_bands_state(dev)) {
745 		SPDK_ERRLOG("Unable to finish the initialization\n");
746 		return -1;
747 	}
748 
749 	if (!ftl_dev_has_nv_cache(dev)) {
750 		ftl_init_complete(init_ctx);
751 	} else {
752 		spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
753 	}
754 
755 	return 0;
756 }
757 
758 static void
759 ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
760 {
761 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
762 
763 	if (spdk_unlikely(status != 0)) {
764 		SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
765 		ftl_init_fail(init_ctx);
766 		return;
767 	}
768 
769 	ftl_init_complete(init_ctx);
770 }
771 
772 static void
773 ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
774 {
775 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
776 	struct spdk_ftl_dev *dev = init_ctx->dev;
777 
778 	if (status) {
779 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
780 		ftl_init_fail(init_ctx);
781 		return;
782 	}
783 
784 	if (ftl_init_bands_state(dev)) {
785 		SPDK_ERRLOG("Unable to finish the initialization\n");
786 		ftl_init_fail(init_ctx);
787 		return;
788 	}
789 
790 	if (!ftl_dev_has_nv_cache(dev)) {
791 		ftl_init_complete(init_ctx);
792 		return;
793 	}
794 
795 	ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
796 }
797 
798 static void
799 ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
800 {
801 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
802 
803 	if (status) {
804 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
805 		goto error;
806 	}
807 
808 	/* After the metadata is read it should be possible to allocate the L2P */
809 	if (ftl_dev_l2p_alloc(init_ctx->dev)) {
810 		SPDK_ERRLOG("Failed to allocate the L2P\n");
811 		goto error;
812 	}
813 
814 	if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
815 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
816 		goto error;
817 	}
818 
819 	return;
820 error:
821 	ftl_init_fail(init_ctx);
822 }
823 
824 static int
825 ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
826 {
827 	struct spdk_ftl_dev *dev = init_ctx->dev;
828 
829 	dev->uuid = init_ctx->opts.uuid;
830 
831 	if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
832 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
833 		return -1;
834 	}
835 
836 	return 0;
837 }
838 
839 static void
840 ftl_dev_update_bands(struct spdk_ftl_dev *dev)
841 {
842 	struct ftl_band *band, *temp_band;
843 	size_t i;
844 
845 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
846 		band = &dev->bands[i];
847 		band->tail_md_addr = ftl_band_tail_md_addr(band);
848 	}
849 
850 	/* Remove band from shut_bands list to prevent further processing */
851 	/* if all blocks on this band are bad */
852 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
853 		if (!band->num_zones) {
854 			dev->num_bands--;
855 			LIST_REMOVE(band, list_entry);
856 		}
857 	}
858 }
859 
860 static void
861 ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
862 {
863 	struct spdk_ftl_dev *dev = init_ctx->dev;
864 
865 	ftl_dev_update_bands(dev);
866 
867 	if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
868 		SPDK_ERRLOG("Unable to initialize device thread\n");
869 		ftl_init_fail(init_ctx);
870 		return;
871 	}
872 
873 	if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
874 		if (ftl_setup_initial_state(init_ctx)) {
875 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
876 			ftl_init_fail(init_ctx);
877 			return;
878 		}
879 	} else {
880 		if (ftl_restore_state(init_ctx)) {
881 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
882 			ftl_init_fail(init_ctx);
883 			return;
884 		}
885 	}
886 }
887 
888 static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
889 
890 static void
891 ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
892 {
893 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
894 	struct spdk_ftl_dev *dev = init_ctx->dev;
895 	struct ftl_band *band;
896 	struct ftl_zone *zone;
897 	struct ftl_addr addr;
898 	size_t i, zones_left, num_zones;
899 
900 	spdk_bdev_free_io(bdev_io);
901 
902 	if (spdk_unlikely(!success)) {
903 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
904 		ftl_init_fail(init_ctx);
905 		return;
906 	}
907 
908 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
909 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
910 
911 	for (i = 0; i < num_zones; ++i) {
912 		addr.offset = init_ctx->info[i].zone_id;
913 		band = &dev->bands[ftl_addr_get_band(dev, addr)];
914 		zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
915 		zone->info = init_ctx->info[i];
916 
917 		/* TODO: add support for zone capacity less than zone size */
918 		if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
919 			zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
920 			SPDK_ERRLOG("Zone capacity is not equal zone size for "
921 				    "zone id: %"PRIu64"\n", init_ctx->zone_id);
922 		}
923 
924 		/* Set write pointer to the last block plus one for zone in full state */
925 		if (zone->info.state == SPDK_BDEV_ZONE_STATE_FULL) {
926 			zone->info.write_pointer = zone->info.zone_id + zone->info.capacity;
927 		}
928 
929 		if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
930 			band->num_zones++;
931 			CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
932 		}
933 	}
934 
935 	init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
936 
937 	ftl_dev_get_zone_info(init_ctx);
938 }
939 
940 static void
941 ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
942 {
943 	struct spdk_ftl_dev *dev = init_ctx->dev;
944 	size_t zones_left, num_zones;
945 	int rc;
946 
947 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
948 	if (zones_left == 0) {
949 		ftl_dev_init_state(init_ctx);
950 		return;
951 	}
952 
953 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
954 
955 	rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
956 				     init_ctx->zone_id, num_zones, init_ctx->info,
957 				     ftl_dev_get_zone_info_cb, init_ctx);
958 
959 	if (spdk_unlikely(rc != 0)) {
960 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
961 		ftl_init_fail(init_ctx);
962 	}
963 }
964 
965 static int
966 ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
967 {
968 	struct spdk_ftl_dev *dev =  init_ctx->dev;
969 
970 	init_ctx->zone_id = 0;
971 	init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
972 	if (!init_ctx->ioch) {
973 		SPDK_ERRLOG("Failed to get base bdev IO channel\n");
974 		return -1;
975 	}
976 
977 	ftl_dev_get_zone_info(init_ctx);
978 
979 	return 0;
980 }
981 
982 struct _ftl_io_channel {
983 	struct ftl_io_channel *ioch;
984 };
985 
986 struct ftl_io_channel *
987 ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
988 {
989 	struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
990 
991 	return _ioch->ioch;
992 }
993 
994 static void
995 ftl_io_channel_register(void *ctx)
996 {
997 	struct ftl_io_channel *ioch = ctx;
998 	struct spdk_ftl_dev *dev = ioch->dev;
999 	uint32_t ioch_index;
1000 
1001 	for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
1002 		if (dev->ioch_array[ioch_index] == NULL) {
1003 			dev->ioch_array[ioch_index] = ioch;
1004 			ioch->index = ioch_index;
1005 			break;
1006 		}
1007 	}
1008 
1009 	assert(ioch_index < dev->conf.max_io_channels);
1010 	TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
1011 }
1012 
1013 static int
1014 ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
1015 {
1016 	struct spdk_ftl_dev *dev = ioch->dev;
1017 	struct ftl_wbuf_entry *entry;
1018 	uint32_t i;
1019 	int rc;
1020 
1021 	ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
1022 	ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
1023 	if (ioch->wbuf_entries == NULL) {
1024 		SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
1025 		return -1;
1026 	}
1027 
1028 	ioch->qdepth_limit = ioch->num_entries;
1029 	ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
1030 					  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1031 	if (ioch->wbuf_payload == NULL) {
1032 		SPDK_ERRLOG("Failed to allocate write buffer payload\n");
1033 		goto error_entries;
1034 	}
1035 
1036 	ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1037 					    spdk_align32pow2(ioch->num_entries + 1),
1038 					    SPDK_ENV_SOCKET_ID_ANY);
1039 	if (ioch->free_queue == NULL) {
1040 		SPDK_ERRLOG("Failed to allocate free queue\n");
1041 		goto error_payload;
1042 	}
1043 
1044 	ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1045 					      spdk_align32pow2(ioch->num_entries + 1),
1046 					      SPDK_ENV_SOCKET_ID_ANY);
1047 	if (ioch->submit_queue == NULL) {
1048 		SPDK_ERRLOG("Failed to allocate submit queue\n");
1049 		goto error_free_queue;
1050 	}
1051 
1052 	for (i = 0; i < ioch->num_entries; ++i) {
1053 		entry = &ioch->wbuf_entries[i];
1054 		entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
1055 		entry->ioch = ioch;
1056 		entry->index = i;
1057 		entry->addr.offset = FTL_ADDR_INVALID;
1058 
1059 		rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
1060 		if (rc != 0) {
1061 			SPDK_ERRLOG("Failed to initialize spinlock\n");
1062 			goto error_spinlock;
1063 		}
1064 
1065 		spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
1066 	}
1067 
1068 	return 0;
1069 error_spinlock:
1070 	for (; i > 0; --i) {
1071 		pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
1072 	}
1073 
1074 	spdk_ring_free(ioch->submit_queue);
1075 error_free_queue:
1076 	spdk_ring_free(ioch->free_queue);
1077 error_payload:
1078 	spdk_free(ioch->wbuf_payload);
1079 error_entries:
1080 	free(ioch->wbuf_entries);
1081 
1082 	return -1;
1083 }
1084 
1085 static int
1086 ftl_io_channel_create_cb(void *io_device, void *ctx)
1087 {
1088 	struct spdk_ftl_dev *dev = io_device;
1089 	struct _ftl_io_channel *_ioch = ctx;
1090 	struct ftl_io_channel *ioch;
1091 	uint32_t num_io_channels;
1092 	char mempool_name[32];
1093 	int rc;
1094 
1095 	num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1096 	if (num_io_channels >= dev->conf.max_io_channels) {
1097 		SPDK_ERRLOG("Reached maximum number of IO channels\n");
1098 		__atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1099 		return -1;
1100 	}
1101 
1102 	ioch = calloc(1, sizeof(*ioch));
1103 	if (ioch == NULL) {
1104 		SPDK_ERRLOG("Failed to allocate IO channel\n");
1105 		return -1;
1106 	}
1107 
1108 	rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
1109 	if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
1110 		SPDK_ERRLOG("Failed to create IO channel pool name\n");
1111 		free(ioch);
1112 		return -1;
1113 	}
1114 
1115 	ioch->cache_ioch = NULL;
1116 	ioch->index = FTL_IO_CHANNEL_INDEX_INVALID;
1117 	ioch->dev = dev;
1118 	ioch->elem_size = sizeof(struct ftl_md_io);
1119 	ioch->io_pool = spdk_mempool_create(mempool_name,
1120 					    dev->conf.user_io_pool_size,
1121 					    ioch->elem_size,
1122 					    0,
1123 					    SPDK_ENV_SOCKET_ID_ANY);
1124 	if (!ioch->io_pool) {
1125 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
1126 		free(ioch);
1127 		return -1;
1128 	}
1129 
1130 	ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
1131 	if (!ioch->base_ioch) {
1132 		SPDK_ERRLOG("Failed to create base bdev IO channel\n");
1133 		goto fail_ioch;
1134 	}
1135 
1136 	if (ftl_dev_has_nv_cache(dev)) {
1137 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
1138 		if (!ioch->cache_ioch) {
1139 			SPDK_ERRLOG("Failed to create cache IO channel\n");
1140 			goto fail_cache;
1141 		}
1142 	}
1143 
1144 	TAILQ_INIT(&ioch->write_cmpl_queue);
1145 	TAILQ_INIT(&ioch->retry_queue);
1146 	ioch->poller = SPDK_POLLER_REGISTER(ftl_io_channel_poll, ioch, 0);
1147 	if (!ioch->poller) {
1148 		SPDK_ERRLOG("Failed to register IO channel poller\n");
1149 		goto fail_poller;
1150 	}
1151 
1152 	if (ftl_io_channel_init_wbuf(ioch)) {
1153 		SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
1154 		goto fail_wbuf;
1155 	}
1156 
1157 	_ioch->ioch = ioch;
1158 
1159 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
1160 
1161 	return 0;
1162 fail_wbuf:
1163 	spdk_poller_unregister(&ioch->poller);
1164 fail_poller:
1165 	if (ioch->cache_ioch) {
1166 		spdk_put_io_channel(ioch->cache_ioch);
1167 	}
1168 fail_cache:
1169 	spdk_put_io_channel(ioch->base_ioch);
1170 fail_ioch:
1171 	spdk_mempool_free(ioch->io_pool);
1172 	free(ioch);
1173 
1174 	return -1;
1175 }
1176 
1177 static void
1178 ftl_io_channel_unregister(void *ctx)
1179 {
1180 	struct ftl_io_channel *ioch = ctx;
1181 	struct spdk_ftl_dev *dev = ioch->dev;
1182 	uint32_t i, num_io_channels __attribute__((unused));
1183 
1184 	assert(ioch->index < dev->conf.max_io_channels);
1185 	assert(dev->ioch_array[ioch->index] == ioch);
1186 
1187 	dev->ioch_array[ioch->index] = NULL;
1188 	TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
1189 
1190 	num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1191 	assert(num_io_channels > 0);
1192 
1193 	for (i = 0; i < ioch->num_entries; ++i) {
1194 		pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
1195 	}
1196 
1197 	spdk_mempool_free(ioch->io_pool);
1198 	spdk_ring_free(ioch->free_queue);
1199 	spdk_ring_free(ioch->submit_queue);
1200 	spdk_free(ioch->wbuf_payload);
1201 	free(ioch->wbuf_entries);
1202 	free(ioch);
1203 }
1204 
1205 static void
1206 _ftl_io_channel_destroy_cb(void *ctx)
1207 {
1208 	struct ftl_io_channel *ioch = ctx;
1209 	struct spdk_ftl_dev *dev = ioch->dev;
1210 
1211 	/* Do not destroy the channel if some of its entries are still in use */
1212 	if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
1213 		spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
1214 		return;
1215 	}
1216 
1217 	spdk_poller_unregister(&ioch->poller);
1218 
1219 	spdk_put_io_channel(ioch->base_ioch);
1220 	if (ioch->cache_ioch) {
1221 		spdk_put_io_channel(ioch->cache_ioch);
1222 	}
1223 
1224 	ioch->base_ioch = NULL;
1225 	ioch->cache_ioch = NULL;
1226 
1227 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
1228 }
1229 
1230 static void
1231 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
1232 {
1233 	struct _ftl_io_channel *_ioch = ctx;
1234 	struct ftl_io_channel *ioch = _ioch->ioch;
1235 
1236 	/* Mark the IO channel as being flush to force out any unwritten entries */
1237 	ioch->flush = true;
1238 
1239 	_ftl_io_channel_destroy_cb(ioch);
1240 }
1241 
1242 static int
1243 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
1244 {
1245 	struct ftl_batch *batch;
1246 	uint32_t i;
1247 
1248 	/* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
1249 	dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
1250 	dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
1251 
1252 	dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
1253 	if (!dev->ioch_array) {
1254 		SPDK_ERRLOG("Failed to allocate IO channel array\n");
1255 		return -1;
1256 	}
1257 
1258 	if (dev->md_size > 0) {
1259 		dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
1260 					   dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
1261 					   SPDK_MALLOC_DMA);
1262 		if (dev->md_buf == NULL) {
1263 			SPDK_ERRLOG("Failed to allocate metadata buffer\n");
1264 			return -1;
1265 		}
1266 	}
1267 
1268 	dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
1269 	if (!dev->iov_buf) {
1270 		SPDK_ERRLOG("Failed to allocate iovec buffer\n");
1271 		return -1;
1272 	}
1273 
1274 	TAILQ_INIT(&dev->free_batches);
1275 	TAILQ_INIT(&dev->pending_batches);
1276 	TAILQ_INIT(&dev->ioch_queue);
1277 
1278 	for (i = 0; i < FTL_BATCH_COUNT; ++i) {
1279 		batch = &dev->batch_array[i];
1280 		batch->iov = &dev->iov_buf[i * dev->xfer_size];
1281 		batch->num_entries = 0;
1282 		batch->index = i;
1283 		TAILQ_INIT(&batch->entries);
1284 		if (dev->md_buf != NULL) {
1285 			batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
1286 		}
1287 
1288 		TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
1289 	}
1290 
1291 	dev->num_io_channels = 0;
1292 
1293 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
1294 				sizeof(struct _ftl_io_channel),
1295 				NULL);
1296 
1297 	return 0;
1298 }
1299 
1300 static int
1301 ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
1302 {
1303 	uint32_t block_size;
1304 	uint64_t num_blocks;
1305 	struct spdk_bdev *bdev;
1306 
1307 	bdev = spdk_bdev_get_by_name(bdev_name);
1308 	if (!bdev) {
1309 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
1310 		return -1;
1311 	}
1312 
1313 	if (!spdk_bdev_is_zoned(bdev)) {
1314 		SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
1315 			    spdk_bdev_get_name(bdev));
1316 		return -1;
1317 	}
1318 
1319 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
1320 			       dev, &dev->base_bdev_desc)) {
1321 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
1322 		return -1;
1323 	}
1324 
1325 	if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
1326 		spdk_bdev_close(dev->base_bdev_desc);
1327 		dev->base_bdev_desc = NULL;
1328 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
1329 		return -1;
1330 	}
1331 
1332 	dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
1333 	dev->md_size = spdk_bdev_get_md_size(bdev);
1334 
1335 	block_size = spdk_bdev_get_block_size(bdev);
1336 	if (block_size != FTL_BLOCK_SIZE) {
1337 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
1338 		return -1;
1339 	}
1340 
1341 	num_blocks = spdk_bdev_get_num_blocks(bdev);
1342 	if (num_blocks % ftl_get_num_punits(dev)) {
1343 		SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
1344 			    "of optimal number of zones.\n");
1345 		return -1;
1346 	}
1347 
1348 	if (ftl_is_append_supported(dev) &&
1349 	    !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1350 		SPDK_ERRLOG("Bdev dosen't support append: %s\n",
1351 			    spdk_bdev_get_name(bdev));
1352 		return -1;
1353 	}
1354 
1355 	dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
1356 	dev->addr_len = spdk_u64log2(num_blocks) + 1;
1357 
1358 	return 0;
1359 }
1360 
1361 static void
1362 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1363 {
1364 	struct ftl_lba_map_request *request = obj;
1365 
1366 	spdk_bit_array_free(&request->segments);
1367 }
1368 
1369 static void
1370 ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
1371 {
1372 	if (!bdev_desc) {
1373 		return;
1374 	}
1375 
1376 	spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
1377 	spdk_bdev_close(bdev_desc);
1378 }
1379 
1380 static void
1381 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1382 {
1383 	struct spdk_ftl_dev *iter;
1384 	size_t i;
1385 
1386 	if (!dev) {
1387 		return;
1388 	}
1389 
1390 	pthread_mutex_lock(&g_ftl_queue_lock);
1391 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1392 		if (iter == dev) {
1393 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1394 			break;
1395 		}
1396 	}
1397 	pthread_mutex_unlock(&g_ftl_queue_lock);
1398 
1399 	assert(LIST_EMPTY(&dev->wptr_list));
1400 	assert(dev->current_batch == NULL);
1401 
1402 	ftl_dev_dump_bands(dev);
1403 	ftl_dev_dump_stats(dev);
1404 
1405 	if (dev->bands) {
1406 		for (i = 0; i < ftl_get_num_bands(dev); ++i) {
1407 			free(dev->bands[i].zone_buf);
1408 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1409 			spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
1410 		}
1411 	}
1412 
1413 	spdk_dma_free(dev->nv_cache.dma_buf);
1414 
1415 	spdk_mempool_free(dev->lba_pool);
1416 	spdk_mempool_free(dev->nv_cache.md_pool);
1417 	spdk_mempool_free(dev->media_events_pool);
1418 	if (dev->lba_request_pool) {
1419 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1420 	}
1421 	spdk_mempool_free(dev->lba_request_pool);
1422 
1423 	ftl_reloc_free(dev->reloc);
1424 
1425 	ftl_release_bdev(dev->nv_cache.bdev_desc);
1426 	ftl_release_bdev(dev->base_bdev_desc);
1427 
1428 	spdk_free(dev->md_buf);
1429 
1430 	assert(dev->num_io_channels == 0);
1431 	free(dev->ioch_array);
1432 	free(dev->iov_buf);
1433 	free(dev->name);
1434 	free(dev->bands);
1435 	if (dev->l2p_pmem_len != 0) {
1436 #ifdef SPDK_CONFIG_PMDK
1437 		pmem_unmap(dev->l2p, dev->l2p_pmem_len);
1438 #endif /* SPDK_CONFIG_PMDK */
1439 	} else {
1440 		free(dev->l2p);
1441 	}
1442 	free((char *)dev->conf.l2p_path);
1443 	free(dev);
1444 }
1445 
1446 int
1447 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
1448 {
1449 	struct spdk_ftl_dev *dev;
1450 	struct spdk_ftl_dev_init_opts opts = *_opts;
1451 	struct ftl_dev_init_ctx *init_ctx = NULL;
1452 	int rc = -ENOMEM;
1453 
1454 	dev = calloc(1, sizeof(*dev));
1455 	if (!dev) {
1456 		return -ENOMEM;
1457 	}
1458 
1459 	init_ctx = calloc(1, sizeof(*init_ctx));
1460 	if (!init_ctx) {
1461 		goto fail_sync;
1462 	}
1463 
1464 	init_ctx->dev = dev;
1465 	init_ctx->opts = *_opts;
1466 	init_ctx->cb_fn = cb_fn;
1467 	init_ctx->cb_arg = cb_arg;
1468 	init_ctx->thread = spdk_get_thread();
1469 
1470 	if (!opts.conf) {
1471 		opts.conf = &g_default_conf;
1472 	}
1473 
1474 	if (!opts.base_bdev) {
1475 		SPDK_ERRLOG("Lack of underlying device in configuration\n");
1476 		rc = -EINVAL;
1477 		goto fail_sync;
1478 	}
1479 
1480 	dev->conf = *opts.conf;
1481 	dev->limit = SPDK_FTL_LIMIT_MAX;
1482 
1483 	dev->name = strdup(opts.name);
1484 	if (!dev->name) {
1485 		SPDK_ERRLOG("Unable to set device name\n");
1486 		goto fail_sync;
1487 	}
1488 
1489 	if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
1490 		SPDK_ERRLOG("Unsupported underlying device\n");
1491 		goto fail_sync;
1492 	}
1493 
1494 	if (opts.conf->l2p_path) {
1495 		dev->conf.l2p_path = strdup(opts.conf->l2p_path);
1496 		if (!dev->conf.l2p_path) {
1497 			rc = -ENOMEM;
1498 			goto fail_sync;
1499 		}
1500 	}
1501 
1502 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1503 	/* so we don't have to clean up in each of the init functions. */
1504 	if (ftl_check_conf(dev, opts.conf)) {
1505 		SPDK_ERRLOG("Invalid device configuration\n");
1506 		goto fail_sync;
1507 	}
1508 
1509 	if (ftl_init_lba_map_pools(dev)) {
1510 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1511 		goto fail_sync;
1512 	}
1513 
1514 	if (ftl_init_media_events_pool(dev)) {
1515 		SPDK_ERRLOG("Unable to init media events pools\n");
1516 		goto fail_sync;
1517 	}
1518 
1519 	ftl_init_wptr_list(dev);
1520 
1521 	if (ftl_dev_init_bands(dev)) {
1522 		SPDK_ERRLOG("Unable to initialize band array\n");
1523 		goto fail_sync;
1524 	}
1525 
1526 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
1527 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1528 		goto fail_sync;
1529 	}
1530 
1531 	dev->reloc = ftl_reloc_init(dev);
1532 	if (!dev->reloc) {
1533 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1534 		goto fail_sync;
1535 	}
1536 
1537 	if (ftl_dev_init_io_channel(dev)) {
1538 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1539 		goto fail_sync;
1540 	}
1541 
1542 	if (ftl_dev_init_zones(init_ctx)) {
1543 		SPDK_ERRLOG("Failed to initialize zones\n");
1544 		goto fail_async;
1545 	}
1546 
1547 	return 0;
1548 fail_sync:
1549 	ftl_dev_free_sync(dev);
1550 	ftl_dev_free_init_ctx(init_ctx);
1551 	return rc;
1552 fail_async:
1553 	ftl_init_fail(init_ctx);
1554 	return 0;
1555 }
1556 
1557 static void
1558 _ftl_halt_defrag(void *arg)
1559 {
1560 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1561 }
1562 
1563 static void
1564 ftl_halt_complete_cb(void *ctx)
1565 {
1566 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1567 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1568 
1569 	/* Make sure core IO channel has already been released */
1570 	if (dev->num_io_channels > 0) {
1571 		spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1572 		return;
1573 	}
1574 
1575 	spdk_io_device_unregister(fini_ctx->dev, NULL);
1576 
1577 	ftl_dev_free_sync(fini_ctx->dev);
1578 	if (fini_ctx->cb_fn != NULL) {
1579 		fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
1580 	}
1581 
1582 	ftl_dev_free_init_ctx(fini_ctx);
1583 }
1584 
1585 static void
1586 ftl_put_io_channel_cb(void *ctx)
1587 {
1588 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1589 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1590 
1591 	spdk_put_io_channel(dev->ioch);
1592 	spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1593 }
1594 
1595 static void
1596 ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1597 {
1598 	struct ftl_dev_init_ctx *fini_ctx = cb_arg;
1599 	int rc = 0;
1600 
1601 	spdk_bdev_free_io(bdev_io);
1602 	if (spdk_unlikely(!success)) {
1603 		SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
1604 		rc = -EIO;
1605 	}
1606 
1607 	fini_ctx->halt_complete_status = rc;
1608 	spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1609 }
1610 
1611 static int
1612 ftl_halt_poller(void *ctx)
1613 {
1614 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1615 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1616 
1617 	if (!dev->core_poller) {
1618 		spdk_poller_unregister(&fini_ctx->poller);
1619 
1620 		if (ftl_dev_has_nv_cache(dev)) {
1621 			ftl_nv_cache_write_header(&dev->nv_cache, true,
1622 						  ftl_nv_cache_header_fini_cb, fini_ctx);
1623 		} else {
1624 			fini_ctx->halt_complete_status = 0;
1625 			spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1626 		}
1627 	}
1628 
1629 	return 0;
1630 }
1631 
1632 static void
1633 ftl_add_halt_poller(void *ctx)
1634 {
1635 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1636 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1637 
1638 	dev->halt = 1;
1639 
1640 	_ftl_halt_defrag(dev);
1641 
1642 	assert(!fini_ctx->poller);
1643 	fini_ctx->poller = SPDK_POLLER_REGISTER(ftl_halt_poller, fini_ctx, 100);
1644 }
1645 
1646 static int
1647 ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
1648 	     struct spdk_thread *thread)
1649 {
1650 	struct ftl_dev_init_ctx *fini_ctx;
1651 
1652 	if (dev->halt_started) {
1653 		dev->halt_started = true;
1654 		return -EBUSY;
1655 	}
1656 
1657 	fini_ctx = calloc(1, sizeof(*fini_ctx));
1658 	if (!fini_ctx) {
1659 		return -ENOMEM;
1660 	}
1661 
1662 	fini_ctx->dev = dev;
1663 	fini_ctx->cb_fn = cb_fn;
1664 	fini_ctx->cb_arg = cb_arg;
1665 	fini_ctx->thread = thread;
1666 
1667 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
1668 	return 0;
1669 }
1670 
1671 int
1672 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
1673 {
1674 	return ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
1675 }
1676 
1677 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1678