xref: /spdk/lib/ftl/ftl_init.c (revision 0ed85362c8132a2d1927757fbcade66b6660d26a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/thread.h"
37 #include "spdk/string.h"
38 #include "spdk/likely.h"
39 #include "spdk_internal/log.h"
40 #include "spdk/ftl.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/bdev_zone.h"
44 #include "spdk/bdev_module.h"
45 #include "spdk/config.h"
46 
47 #include "ftl_core.h"
48 #include "ftl_io.h"
49 #include "ftl_reloc.h"
50 #include "ftl_band.h"
51 #include "ftl_debug.h"
52 
53 #ifdef SPDK_CONFIG_PMDK
54 #include "libpmem.h"
55 #endif /* SPDK_CONFIG_PMDK */
56 
57 #define FTL_CORE_RING_SIZE	4096
58 #define FTL_INIT_TIMEOUT	30
59 #define FTL_NSID		1
60 #define FTL_ZONE_INFO_COUNT	64
61 
62 /* Dummy bdev module used to to claim bdevs. */
63 static struct spdk_bdev_module g_ftl_bdev_module = {
64 	.name	= "ftl_lib",
65 };
66 
67 struct ftl_dev_init_ctx {
68 	/* Owner */
69 	struct spdk_ftl_dev		*dev;
70 	/* Initial arguments */
71 	struct spdk_ftl_dev_init_opts	opts;
72 	/* IO channel for zone info retrieving */
73 	struct spdk_io_channel		*ioch;
74 	/* Buffer for reading zone info  */
75 	struct spdk_bdev_zone_info	info[FTL_ZONE_INFO_COUNT];
76 	/* Currently read zone */
77 	size_t				zone_id;
78 	/* User's callback */
79 	spdk_ftl_init_fn		cb_fn;
80 	/* Callback's argument */
81 	void				*cb_arg;
82 	/* Thread to call the callback on */
83 	struct spdk_thread		*thread;
84 	/* Poller to check if the device has been destroyed/initialized */
85 	struct spdk_poller		*poller;
86 	/* Status to return for halt completion callback */
87 	int				halt_complete_status;
88 };
89 
90 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
91 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
92 static const struct spdk_ftl_conf	g_default_conf = {
93 	.limits = {
94 		/* 5 free bands  / 0 % host writes */
95 		[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
96 		/* 10 free bands / 5 % host writes */
97 		[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
98 		/* 20 free bands / 40 % host writes */
99 		[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
100 		/* 40 free bands / 100 % host writes - defrag starts running */
101 		[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
102 	},
103 	/* 10 percent valid blocks */
104 	.invalid_thld = 10,
105 	/* 20% spare blocks */
106 	.lba_rsvd = 20,
107 	/* 6M write buffer per each IO channel */
108 	.write_buffer_size = 6 * 1024 * 1024,
109 	/* 90% band fill threshold */
110 	.band_thld = 90,
111 	/* Max 32 IO depth per band relocate */
112 	.max_reloc_qdepth = 32,
113 	/* Max 3 active band relocates */
114 	.max_active_relocs = 3,
115 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
116 	.user_io_pool_size = 2048,
117 	/*
118 	 * If clear ftl will return error when restoring after a dirty shutdown
119 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
120 	 * will result in lost data after recovery.
121 	 */
122 	.allow_open_bands = false,
123 	.max_io_channels = 128,
124 	.nv_cache = {
125 		/* Maximum number of concurrent requests */
126 		.max_request_cnt = 2048,
127 		/* Maximum number of blocks per request */
128 		.max_request_size = 16,
129 	}
130 };
131 
132 static int
133 ftl_band_init_md(struct ftl_band *band)
134 {
135 	struct ftl_lba_map *lba_map = &band->lba_map;
136 	int rc;
137 
138 	lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
139 	if (!lba_map->vld) {
140 		return -ENOMEM;
141 	}
142 
143 	rc = pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
144 	if (rc) {
145 		spdk_bit_array_free(&lba_map->vld);
146 		return rc;
147 	}
148 	ftl_band_md_clear(band);
149 	return 0;
150 }
151 
152 static int
153 ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
154 {
155 	size_t i;
156 
157 	if (conf->invalid_thld >= 100) {
158 		return -1;
159 	}
160 	if (conf->lba_rsvd >= 100) {
161 		return -1;
162 	}
163 	if (conf->lba_rsvd == 0) {
164 		return -1;
165 	}
166 	if (conf->write_buffer_size == 0) {
167 		return -1;
168 	}
169 	if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
170 		return -1;
171 	}
172 
173 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
174 		if (conf->limits[i].limit > 100) {
175 			return -1;
176 		}
177 	}
178 
179 	return 0;
180 }
181 
182 static int
183 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
184 {
185 	struct ftl_band *band, *pband;
186 	unsigned int i;
187 	int rc = 0;
188 
189 	LIST_INIT(&dev->free_bands);
190 	LIST_INIT(&dev->shut_bands);
191 
192 	dev->num_free = 0;
193 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
194 	if (!dev->bands) {
195 		return -1;
196 	}
197 
198 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
199 		band = &dev->bands[i];
200 		band->id = i;
201 		band->dev = dev;
202 		band->state = FTL_BAND_STATE_CLOSED;
203 
204 		if (LIST_EMPTY(&dev->shut_bands)) {
205 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
206 		} else {
207 			LIST_INSERT_AFTER(pband, band, list_entry);
208 		}
209 		pband = band;
210 
211 		CIRCLEQ_INIT(&band->zones);
212 		band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
213 		if (!band->zone_buf) {
214 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
215 			rc = -1;
216 			break;
217 		}
218 
219 		rc = ftl_band_init_md(band);
220 		if (rc) {
221 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
222 			break;
223 		}
224 
225 		band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
226 		if (!band->reloc_bitmap) {
227 			SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
228 			break;
229 		}
230 	}
231 
232 	return rc;
233 }
234 
235 static void
236 ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
237 {
238 	struct spdk_ftl_dev *dev = event_ctx;
239 
240 	switch (type) {
241 	case SPDK_BDEV_EVENT_REMOVE:
242 		assert(0);
243 		break;
244 	case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
245 		assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
246 		ftl_get_media_events(dev);
247 	default:
248 		break;
249 	}
250 }
251 
252 static int
253 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
254 {
255 	struct spdk_bdev *bdev;
256 	struct spdk_ftl_conf *conf = &dev->conf;
257 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
258 	char pool_name[128];
259 	int rc;
260 
261 	if (!bdev_name) {
262 		return 0;
263 	}
264 
265 	bdev = spdk_bdev_get_by_name(bdev_name);
266 	if (!bdev) {
267 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
268 		return -1;
269 	}
270 
271 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
272 			       dev, &nv_cache->bdev_desc)) {
273 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
274 		return -1;
275 	}
276 
277 	if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
278 		spdk_bdev_close(nv_cache->bdev_desc);
279 		nv_cache->bdev_desc = NULL;
280 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
281 		return -1;
282 	}
283 
284 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
285 		     spdk_bdev_get_name(bdev));
286 
287 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
288 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
289 		return -1;
290 	}
291 
292 	if (!spdk_bdev_is_md_separate(bdev)) {
293 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
294 			    spdk_bdev_get_name(bdev));
295 		return -1;
296 	}
297 
298 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
299 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
300 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
301 		return -1;
302 	}
303 
304 	if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
305 		SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
306 			    spdk_bdev_get_name(bdev));
307 		return -1;
308 	}
309 
310 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
311 	 * from the fact that cache works as a protection against power loss, so before the data
312 	 * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
313 	 * extra block is needed to store the header.
314 	 */
315 	if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
316 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
317 			    PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
318 			    ftl_get_num_blocks_in_band(dev) * 2 + 1);
319 		return -1;
320 	}
321 
322 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
323 	if (rc < 0 || rc >= 128) {
324 		return -1;
325 	}
326 
327 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
328 						spdk_bdev_get_md_size(bdev) *
329 						conf->nv_cache.max_request_size,
330 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
331 						SPDK_ENV_SOCKET_ID_ANY);
332 	if (!nv_cache->md_pool) {
333 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
334 		return -1;
335 	}
336 
337 	nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
338 	if (!nv_cache->dma_buf) {
339 		SPDK_ERRLOG("Memory allocation failure\n");
340 		return -1;
341 	}
342 
343 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
344 		SPDK_ERRLOG("Failed to initialize cache lock\n");
345 		return -1;
346 	}
347 
348 	nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
349 	nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
350 	nv_cache->num_available = nv_cache->num_data_blocks;
351 	nv_cache->ready = false;
352 
353 	return 0;
354 }
355 
356 void
357 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
358 {
359 	*conf = g_default_conf;
360 }
361 
362 static void
363 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
364 {
365 	struct ftl_lba_map_request *request = obj;
366 	struct spdk_ftl_dev *dev = opaque;
367 
368 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
369 				    ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
370 }
371 
372 static int
373 ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
374 {
375 	char pool_name[128];
376 	int rc;
377 
378 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
379 	if (rc < 0 || rc >= (int)sizeof(pool_name)) {
380 		SPDK_ERRLOG("Failed to create media pool name\n");
381 		return -1;
382 	}
383 
384 	dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
385 				 sizeof(struct ftl_media_event),
386 				 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
387 				 SPDK_ENV_SOCKET_ID_ANY);
388 	if (!dev->media_events_pool) {
389 		SPDK_ERRLOG("Failed to create media events pool\n");
390 		return -1;
391 	}
392 
393 	return 0;
394 }
395 
396 static int
397 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
398 {
399 #define POOL_NAME_LEN 128
400 	char pool_name[POOL_NAME_LEN];
401 	int rc;
402 
403 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
404 	if (rc < 0 || rc >= POOL_NAME_LEN) {
405 		return -ENAMETOOLONG;
406 	}
407 
408 	/* We need to reserve at least 2 buffers for band close / open sequence
409 	 * alone, plus additional (8) buffers for handling write errors.
410 	 * TODO: This memory pool is utilized only by core thread - it introduce
411 	 * unnecessary overhead and should be replaced by different data structure.
412 	 */
413 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
414 					    ftl_lba_map_pool_elem_size(dev),
415 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
416 					    SPDK_ENV_SOCKET_ID_ANY);
417 	if (!dev->lba_pool) {
418 		return -ENOMEM;
419 	}
420 
421 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
422 	if (rc < 0 || rc >= POOL_NAME_LEN) {
423 		return -ENAMETOOLONG;
424 	}
425 
426 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
427 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
428 				sizeof(struct ftl_lba_map_request),
429 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
430 				SPDK_ENV_SOCKET_ID_ANY,
431 				ftl_lba_map_request_ctor,
432 				dev);
433 	if (!dev->lba_request_pool) {
434 		return -ENOMEM;
435 	}
436 
437 	return 0;
438 }
439 
440 static void
441 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
442 {
443 	LIST_INIT(&dev->wptr_list);
444 	LIST_INIT(&dev->flush_list);
445 	LIST_INIT(&dev->band_flush_list);
446 }
447 
448 static size_t
449 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
450 {
451 	struct ftl_band *band;
452 	size_t seq = 0;
453 
454 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
455 		if (band->seq > seq) {
456 			seq = band->seq;
457 		}
458 	}
459 
460 	return seq;
461 }
462 
463 static void
464 _ftl_init_bands_state(void *ctx)
465 {
466 	struct ftl_band *band, *temp_band;
467 	struct spdk_ftl_dev *dev = ctx;
468 
469 	dev->seq = ftl_dev_band_max_seq(dev);
470 
471 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
472 		if (!band->lba_map.num_vld) {
473 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
474 		}
475 	}
476 
477 	ftl_reloc_resume(dev->reloc);
478 	/* Clear the limit applications as they're incremented incorrectly by */
479 	/* the initialization code */
480 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
481 }
482 
483 static int
484 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
485 {
486 	struct ftl_band *band;
487 	int cnt = 0;
488 
489 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
490 		if (band->num_zones && !band->lba_map.num_vld) {
491 			cnt++;
492 		}
493 	}
494 	return cnt;
495 }
496 
497 static int
498 ftl_init_bands_state(struct spdk_ftl_dev *dev)
499 {
500 	/* TODO: Should we abort initialization or expose read only device */
501 	/* if there is no free bands? */
502 	/* If we abort initialization should we depend on condition that */
503 	/* we have no free bands or should we have some minimal number of */
504 	/* free bands? */
505 	if (!ftl_init_num_free_bands(dev)) {
506 		return -1;
507 	}
508 
509 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
510 	return 0;
511 }
512 
513 static void
514 _ftl_dev_init_core_thread(void *ctx)
515 {
516 	struct spdk_ftl_dev *dev = ctx;
517 
518 	dev->core_poller = SPDK_POLLER_REGISTER(ftl_task_core, dev, 0);
519 	if (!dev->core_poller) {
520 		SPDK_ERRLOG("Unable to register core poller\n");
521 		assert(0);
522 	}
523 
524 	dev->ioch = spdk_get_io_channel(dev);
525 }
526 
527 static int
528 ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
529 {
530 	if (!opts->core_thread) {
531 		return -1;
532 	}
533 
534 	dev->core_thread = opts->core_thread;
535 
536 	spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
537 	return 0;
538 }
539 
540 static int
541 ftl_dev_l2p_alloc_pmem(struct spdk_ftl_dev *dev, size_t l2p_size, const char *l2p_path)
542 {
543 #ifdef SPDK_CONFIG_PMDK
544 	int is_pmem;
545 
546 	if ((dev->l2p = pmem_map_file(l2p_path, 0,
547 				      0, 0, &dev->l2p_pmem_len, &is_pmem)) == NULL) {
548 		SPDK_ERRLOG("Failed to mmap l2p_path\n");
549 		return -1;
550 	}
551 
552 	if (!is_pmem) {
553 		SPDK_NOTICELOG("l2p_path mapped on non-pmem device\n");
554 	}
555 
556 	if (dev->l2p_pmem_len < l2p_size) {
557 		SPDK_ERRLOG("l2p_path file is too small\n");
558 		return -1;
559 	}
560 
561 	pmem_memset_persist(dev->l2p, FTL_ADDR_INVALID, l2p_size);
562 
563 	return 0;
564 #else /* SPDK_CONFIG_PMDK */
565 	SPDK_ERRLOG("Libpmem not available, cannot use pmem l2p_path\n");
566 	return -1;
567 #endif /* SPDK_CONFIG_PMDK */
568 }
569 
570 static int
571 ftl_dev_l2p_alloc_dram(struct spdk_ftl_dev *dev, size_t l2p_size)
572 {
573 	dev->l2p = malloc(l2p_size);
574 	if (!dev->l2p) {
575 		SPDK_ERRLOG("Failed to allocate l2p table\n");
576 		return -1;
577 	}
578 
579 	memset(dev->l2p, FTL_ADDR_INVALID, l2p_size);
580 
581 	return 0;
582 }
583 
584 static int
585 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
586 {
587 	size_t addr_size = dev->addr_len >= 32 ? 8 : 4;
588 	size_t l2p_size = dev->num_lbas * addr_size;
589 	const char *l2p_path = dev->conf.l2p_path;
590 
591 	if (dev->num_lbas == 0) {
592 		SPDK_ERRLOG("Invalid l2p table size\n");
593 		return -1;
594 	}
595 
596 	if (dev->l2p) {
597 		SPDK_ERRLOG("L2p table already allocated\n");
598 		return -1;
599 	}
600 
601 	dev->l2p_pmem_len = 0;
602 	if (l2p_path) {
603 		return ftl_dev_l2p_alloc_pmem(dev, l2p_size, l2p_path);
604 	} else {
605 		return ftl_dev_l2p_alloc_dram(dev, l2p_size);
606 	}
607 }
608 
609 static void
610 ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
611 {
612 	if (!init_ctx) {
613 		return;
614 	}
615 
616 	if (init_ctx->ioch) {
617 		spdk_put_io_channel(init_ctx->ioch);
618 	}
619 
620 	free(init_ctx);
621 }
622 
623 static void
624 ftl_call_init_complete_cb(void *ctx)
625 {
626 	struct ftl_dev_init_ctx *init_ctx = ctx;
627 	struct spdk_ftl_dev *dev = init_ctx->dev;
628 
629 	if (init_ctx->cb_fn != NULL) {
630 		init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
631 	}
632 
633 	ftl_dev_free_init_ctx(init_ctx);
634 }
635 
636 static void
637 ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
638 {
639 	struct spdk_ftl_dev *dev = init_ctx->dev;
640 
641 	pthread_mutex_lock(&g_ftl_queue_lock);
642 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
643 	pthread_mutex_unlock(&g_ftl_queue_lock);
644 
645 	dev->initialized = 1;
646 
647 	spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
648 }
649 
650 static void
651 ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
652 {
653 	struct ftl_dev_init_ctx *init_ctx = ctx;
654 
655 	if (init_ctx->cb_fn != NULL) {
656 		init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
657 	}
658 
659 	ftl_dev_free_init_ctx(init_ctx);
660 }
661 
662 static int ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
663 			struct spdk_thread *thread);
664 
665 static void
666 ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
667 {
668 	if (ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
669 		SPDK_ERRLOG("Unable to free the device\n");
670 		assert(0);
671 	}
672 }
673 
674 static void
675 ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
676 {
677 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
678 	struct spdk_ftl_dev *dev = init_ctx->dev;
679 
680 	spdk_bdev_free_io(bdev_io);
681 	if (spdk_unlikely(!success)) {
682 		SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
683 		ftl_init_fail(init_ctx);
684 		return;
685 	}
686 
687 	dev->nv_cache.ready = true;
688 	ftl_init_complete(init_ctx);
689 }
690 
691 static void
692 ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
693 {
694 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
695 	struct spdk_ftl_dev *dev = init_ctx->dev;
696 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
697 
698 	spdk_bdev_free_io(bdev_io);
699 	if (spdk_unlikely(!success)) {
700 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
701 		ftl_init_fail(init_ctx);
702 		return;
703 	}
704 
705 	nv_cache->phase = 1;
706 	if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
707 		SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
708 		ftl_init_fail(init_ctx);
709 	}
710 }
711 
712 static void
713 _ftl_nv_cache_scrub(void *ctx)
714 {
715 	struct ftl_dev_init_ctx *init_ctx = ctx;
716 	struct spdk_ftl_dev *dev = init_ctx->dev;
717 	int rc;
718 
719 	rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
720 
721 	if (spdk_unlikely(rc != 0)) {
722 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
723 			    spdk_strerror(-rc));
724 		ftl_init_fail(init_ctx);
725 	}
726 }
727 
728 static int
729 ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
730 {
731 	struct spdk_ftl_dev *dev = init_ctx->dev;
732 	struct spdk_ftl_conf *conf = &dev->conf;
733 	size_t i;
734 
735 	spdk_uuid_generate(&dev->uuid);
736 
737 	dev->num_lbas = 0;
738 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
739 		dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
740 	}
741 
742 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
743 
744 	if (ftl_dev_l2p_alloc(dev)) {
745 		SPDK_ERRLOG("Unable to init l2p table\n");
746 		return -1;
747 	}
748 
749 	if (ftl_init_bands_state(dev)) {
750 		SPDK_ERRLOG("Unable to finish the initialization\n");
751 		return -1;
752 	}
753 
754 	if (!ftl_dev_has_nv_cache(dev)) {
755 		ftl_init_complete(init_ctx);
756 	} else {
757 		spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
758 	}
759 
760 	return 0;
761 }
762 
763 static void
764 ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
765 {
766 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
767 
768 	if (spdk_unlikely(status != 0)) {
769 		SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
770 		ftl_init_fail(init_ctx);
771 		return;
772 	}
773 
774 	ftl_init_complete(init_ctx);
775 }
776 
777 static void
778 ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
779 {
780 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
781 	struct spdk_ftl_dev *dev = init_ctx->dev;
782 
783 	if (status) {
784 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
785 		ftl_init_fail(init_ctx);
786 		return;
787 	}
788 
789 	if (ftl_init_bands_state(dev)) {
790 		SPDK_ERRLOG("Unable to finish the initialization\n");
791 		ftl_init_fail(init_ctx);
792 		return;
793 	}
794 
795 	if (!ftl_dev_has_nv_cache(dev)) {
796 		ftl_init_complete(init_ctx);
797 		return;
798 	}
799 
800 	ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
801 }
802 
803 static void
804 ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
805 {
806 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
807 
808 	if (status) {
809 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
810 		goto error;
811 	}
812 
813 	/* After the metadata is read it should be possible to allocate the L2P */
814 	if (ftl_dev_l2p_alloc(init_ctx->dev)) {
815 		SPDK_ERRLOG("Failed to allocate the L2P\n");
816 		goto error;
817 	}
818 
819 	if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
820 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
821 		goto error;
822 	}
823 
824 	return;
825 error:
826 	ftl_init_fail(init_ctx);
827 }
828 
829 static int
830 ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
831 {
832 	struct spdk_ftl_dev *dev = init_ctx->dev;
833 
834 	dev->uuid = init_ctx->opts.uuid;
835 
836 	if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
837 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
838 		return -1;
839 	}
840 
841 	return 0;
842 }
843 
844 static void
845 ftl_dev_update_bands(struct spdk_ftl_dev *dev)
846 {
847 	struct ftl_band *band, *temp_band;
848 	size_t i;
849 
850 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
851 		band = &dev->bands[i];
852 		band->tail_md_addr = ftl_band_tail_md_addr(band);
853 	}
854 
855 	/* Remove band from shut_bands list to prevent further processing */
856 	/* if all blocks on this band are bad */
857 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
858 		if (!band->num_zones) {
859 			dev->num_bands--;
860 			LIST_REMOVE(band, list_entry);
861 		}
862 	}
863 }
864 
865 static void
866 ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
867 {
868 	struct spdk_ftl_dev *dev = init_ctx->dev;
869 
870 	ftl_dev_update_bands(dev);
871 
872 	if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
873 		SPDK_ERRLOG("Unable to initialize device thread\n");
874 		ftl_init_fail(init_ctx);
875 		return;
876 	}
877 
878 	if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
879 		if (ftl_setup_initial_state(init_ctx)) {
880 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
881 			ftl_init_fail(init_ctx);
882 			return;
883 		}
884 	} else {
885 		if (ftl_restore_state(init_ctx)) {
886 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
887 			ftl_init_fail(init_ctx);
888 			return;
889 		}
890 	}
891 }
892 
893 static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
894 
895 static void
896 ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
897 {
898 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
899 	struct spdk_ftl_dev *dev = init_ctx->dev;
900 	struct ftl_band *band;
901 	struct ftl_zone *zone;
902 	struct ftl_addr addr;
903 	size_t i, zones_left, num_zones;
904 
905 	spdk_bdev_free_io(bdev_io);
906 
907 	if (spdk_unlikely(!success)) {
908 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
909 		ftl_init_fail(init_ctx);
910 		return;
911 	}
912 
913 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
914 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
915 
916 	for (i = 0; i < num_zones; ++i) {
917 		addr.offset = init_ctx->info[i].zone_id;
918 		band = &dev->bands[ftl_addr_get_band(dev, addr)];
919 		zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
920 		zone->info = init_ctx->info[i];
921 
922 		/* TODO: add support for zone capacity less than zone size */
923 		if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
924 			zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
925 			SPDK_ERRLOG("Zone capacity is not equal zone size for "
926 				    "zone id: %"PRIu64"\n", init_ctx->zone_id);
927 		}
928 
929 		/* Set write pointer to the last block plus one for zone in full state */
930 		if (zone->info.state == SPDK_BDEV_ZONE_STATE_FULL) {
931 			zone->info.write_pointer = zone->info.zone_id + zone->info.capacity;
932 		}
933 
934 		if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
935 			band->num_zones++;
936 			CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
937 		}
938 	}
939 
940 	init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
941 
942 	ftl_dev_get_zone_info(init_ctx);
943 }
944 
945 static void
946 ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
947 {
948 	struct spdk_ftl_dev *dev = init_ctx->dev;
949 	size_t zones_left, num_zones;
950 	int rc;
951 
952 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
953 	if (zones_left == 0) {
954 		ftl_dev_init_state(init_ctx);
955 		return;
956 	}
957 
958 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
959 
960 	rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
961 				     init_ctx->zone_id, num_zones, init_ctx->info,
962 				     ftl_dev_get_zone_info_cb, init_ctx);
963 
964 	if (spdk_unlikely(rc != 0)) {
965 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
966 		ftl_init_fail(init_ctx);
967 	}
968 }
969 
970 static int
971 ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
972 {
973 	struct spdk_ftl_dev *dev =  init_ctx->dev;
974 
975 	init_ctx->zone_id = 0;
976 	init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
977 	if (!init_ctx->ioch) {
978 		SPDK_ERRLOG("Failed to get base bdev IO channel\n");
979 		return -1;
980 	}
981 
982 	ftl_dev_get_zone_info(init_ctx);
983 
984 	return 0;
985 }
986 
987 struct _ftl_io_channel {
988 	struct ftl_io_channel *ioch;
989 };
990 
991 struct ftl_io_channel *
992 ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
993 {
994 	struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
995 
996 	return _ioch->ioch;
997 }
998 
999 static void
1000 ftl_io_channel_register(void *ctx)
1001 {
1002 	struct ftl_io_channel *ioch = ctx;
1003 	struct spdk_ftl_dev *dev = ioch->dev;
1004 	uint32_t ioch_index;
1005 
1006 	for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
1007 		if (dev->ioch_array[ioch_index] == NULL) {
1008 			dev->ioch_array[ioch_index] = ioch;
1009 			ioch->index = ioch_index;
1010 			break;
1011 		}
1012 	}
1013 
1014 	assert(ioch_index < dev->conf.max_io_channels);
1015 	TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
1016 }
1017 
1018 static int
1019 ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
1020 {
1021 	struct spdk_ftl_dev *dev = ioch->dev;
1022 	struct ftl_wbuf_entry *entry;
1023 	uint32_t i;
1024 	int rc;
1025 
1026 	ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
1027 	ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
1028 	if (ioch->wbuf_entries == NULL) {
1029 		SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
1030 		return -1;
1031 	}
1032 
1033 	ioch->qdepth_limit = ioch->num_entries;
1034 	ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
1035 					  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1036 	if (ioch->wbuf_payload == NULL) {
1037 		SPDK_ERRLOG("Failed to allocate write buffer payload\n");
1038 		goto error_entries;
1039 	}
1040 
1041 	ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1042 					    spdk_align32pow2(ioch->num_entries + 1),
1043 					    SPDK_ENV_SOCKET_ID_ANY);
1044 	if (ioch->free_queue == NULL) {
1045 		SPDK_ERRLOG("Failed to allocate free queue\n");
1046 		goto error_payload;
1047 	}
1048 
1049 	ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1050 					      spdk_align32pow2(ioch->num_entries + 1),
1051 					      SPDK_ENV_SOCKET_ID_ANY);
1052 	if (ioch->submit_queue == NULL) {
1053 		SPDK_ERRLOG("Failed to allocate submit queue\n");
1054 		goto error_free_queue;
1055 	}
1056 
1057 	for (i = 0; i < ioch->num_entries; ++i) {
1058 		entry = &ioch->wbuf_entries[i];
1059 		entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
1060 		entry->ioch = ioch;
1061 		entry->index = i;
1062 		entry->addr.offset = FTL_ADDR_INVALID;
1063 
1064 		rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
1065 		if (rc != 0) {
1066 			SPDK_ERRLOG("Failed to initialize spinlock\n");
1067 			goto error_spinlock;
1068 		}
1069 
1070 		spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
1071 	}
1072 
1073 	return 0;
1074 error_spinlock:
1075 	for (; i > 0; --i) {
1076 		pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
1077 	}
1078 
1079 	spdk_ring_free(ioch->submit_queue);
1080 error_free_queue:
1081 	spdk_ring_free(ioch->free_queue);
1082 error_payload:
1083 	spdk_free(ioch->wbuf_payload);
1084 error_entries:
1085 	free(ioch->wbuf_entries);
1086 
1087 	return -1;
1088 }
1089 
1090 static int
1091 ftl_io_channel_create_cb(void *io_device, void *ctx)
1092 {
1093 	struct spdk_ftl_dev *dev = io_device;
1094 	struct _ftl_io_channel *_ioch = ctx;
1095 	struct ftl_io_channel *ioch;
1096 	uint32_t num_io_channels;
1097 	char mempool_name[32];
1098 	int rc;
1099 
1100 	num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1101 	if (num_io_channels >= dev->conf.max_io_channels) {
1102 		SPDK_ERRLOG("Reached maximum number of IO channels\n");
1103 		__atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1104 		return -1;
1105 	}
1106 
1107 	ioch = calloc(1, sizeof(*ioch));
1108 	if (ioch == NULL) {
1109 		SPDK_ERRLOG("Failed to allocate IO channel\n");
1110 		return -1;
1111 	}
1112 
1113 	rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
1114 	if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
1115 		SPDK_ERRLOG("Failed to create IO channel pool name\n");
1116 		free(ioch);
1117 		return -1;
1118 	}
1119 
1120 	ioch->cache_ioch = NULL;
1121 	ioch->index = FTL_IO_CHANNEL_INDEX_INVALID;
1122 	ioch->dev = dev;
1123 	ioch->elem_size = sizeof(struct ftl_md_io);
1124 	ioch->io_pool = spdk_mempool_create(mempool_name,
1125 					    dev->conf.user_io_pool_size,
1126 					    ioch->elem_size,
1127 					    0,
1128 					    SPDK_ENV_SOCKET_ID_ANY);
1129 	if (!ioch->io_pool) {
1130 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
1131 		free(ioch);
1132 		return -1;
1133 	}
1134 
1135 	ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
1136 	if (!ioch->base_ioch) {
1137 		SPDK_ERRLOG("Failed to create base bdev IO channel\n");
1138 		goto fail_ioch;
1139 	}
1140 
1141 	if (ftl_dev_has_nv_cache(dev)) {
1142 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
1143 		if (!ioch->cache_ioch) {
1144 			SPDK_ERRLOG("Failed to create cache IO channel\n");
1145 			goto fail_cache;
1146 		}
1147 	}
1148 
1149 	TAILQ_INIT(&ioch->write_cmpl_queue);
1150 	TAILQ_INIT(&ioch->retry_queue);
1151 	ioch->poller = SPDK_POLLER_REGISTER(ftl_io_channel_poll, ioch, 0);
1152 	if (!ioch->poller) {
1153 		SPDK_ERRLOG("Failed to register IO channel poller\n");
1154 		goto fail_poller;
1155 	}
1156 
1157 	if (ftl_io_channel_init_wbuf(ioch)) {
1158 		SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
1159 		goto fail_wbuf;
1160 	}
1161 
1162 	_ioch->ioch = ioch;
1163 
1164 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
1165 
1166 	return 0;
1167 fail_wbuf:
1168 	spdk_poller_unregister(&ioch->poller);
1169 fail_poller:
1170 	if (ioch->cache_ioch) {
1171 		spdk_put_io_channel(ioch->cache_ioch);
1172 	}
1173 fail_cache:
1174 	spdk_put_io_channel(ioch->base_ioch);
1175 fail_ioch:
1176 	spdk_mempool_free(ioch->io_pool);
1177 	free(ioch);
1178 
1179 	return -1;
1180 }
1181 
1182 static void
1183 ftl_io_channel_unregister(void *ctx)
1184 {
1185 	struct ftl_io_channel *ioch = ctx;
1186 	struct spdk_ftl_dev *dev = ioch->dev;
1187 	uint32_t i, num_io_channels __attribute__((unused));
1188 
1189 	assert(ioch->index < dev->conf.max_io_channels);
1190 	assert(dev->ioch_array[ioch->index] == ioch);
1191 
1192 	dev->ioch_array[ioch->index] = NULL;
1193 	TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
1194 
1195 	num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1196 	assert(num_io_channels > 0);
1197 
1198 	for (i = 0; i < ioch->num_entries; ++i) {
1199 		pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
1200 	}
1201 
1202 	spdk_mempool_free(ioch->io_pool);
1203 	spdk_ring_free(ioch->free_queue);
1204 	spdk_ring_free(ioch->submit_queue);
1205 	spdk_free(ioch->wbuf_payload);
1206 	free(ioch->wbuf_entries);
1207 	free(ioch);
1208 }
1209 
1210 static void
1211 _ftl_io_channel_destroy_cb(void *ctx)
1212 {
1213 	struct ftl_io_channel *ioch = ctx;
1214 	struct spdk_ftl_dev *dev = ioch->dev;
1215 	uint32_t i;
1216 
1217 	/* Do not destroy the channel if some of its entries are still in use */
1218 	if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
1219 		spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
1220 		return;
1221 	}
1222 
1223 	/* Evict all valid entries from cache */
1224 	for (i = 0; i < ioch->num_entries; ++i) {
1225 		ftl_evict_cache_entry(dev, &ioch->wbuf_entries[i]);
1226 	}
1227 
1228 	spdk_poller_unregister(&ioch->poller);
1229 
1230 	spdk_put_io_channel(ioch->base_ioch);
1231 	if (ioch->cache_ioch) {
1232 		spdk_put_io_channel(ioch->cache_ioch);
1233 	}
1234 
1235 	ioch->base_ioch = NULL;
1236 	ioch->cache_ioch = NULL;
1237 
1238 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
1239 }
1240 
1241 static void
1242 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
1243 {
1244 	struct _ftl_io_channel *_ioch = ctx;
1245 	struct ftl_io_channel *ioch = _ioch->ioch;
1246 
1247 	/* Mark the IO channel as being flush to force out any unwritten entries */
1248 	ioch->flush = true;
1249 
1250 	_ftl_io_channel_destroy_cb(ioch);
1251 }
1252 
1253 static int
1254 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
1255 {
1256 	struct ftl_batch *batch;
1257 	uint32_t i;
1258 
1259 	/* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
1260 	dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
1261 	dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
1262 
1263 	dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
1264 	if (!dev->ioch_array) {
1265 		SPDK_ERRLOG("Failed to allocate IO channel array\n");
1266 		return -1;
1267 	}
1268 
1269 	if (dev->md_size > 0) {
1270 		dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
1271 					   dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
1272 					   SPDK_MALLOC_DMA);
1273 		if (dev->md_buf == NULL) {
1274 			SPDK_ERRLOG("Failed to allocate metadata buffer\n");
1275 			return -1;
1276 		}
1277 	}
1278 
1279 	dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
1280 	if (!dev->iov_buf) {
1281 		SPDK_ERRLOG("Failed to allocate iovec buffer\n");
1282 		return -1;
1283 	}
1284 
1285 	TAILQ_INIT(&dev->free_batches);
1286 	TAILQ_INIT(&dev->pending_batches);
1287 	TAILQ_INIT(&dev->ioch_queue);
1288 
1289 	for (i = 0; i < FTL_BATCH_COUNT; ++i) {
1290 		batch = &dev->batch_array[i];
1291 		batch->iov = &dev->iov_buf[i * dev->xfer_size];
1292 		batch->num_entries = 0;
1293 		batch->index = i;
1294 		TAILQ_INIT(&batch->entries);
1295 		if (dev->md_buf != NULL) {
1296 			batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
1297 		}
1298 
1299 		TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
1300 	}
1301 
1302 	dev->num_io_channels = 0;
1303 
1304 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
1305 				sizeof(struct _ftl_io_channel),
1306 				NULL);
1307 
1308 	return 0;
1309 }
1310 
1311 static int
1312 ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
1313 {
1314 	uint32_t block_size;
1315 	uint64_t num_blocks;
1316 	struct spdk_bdev *bdev;
1317 
1318 	bdev = spdk_bdev_get_by_name(bdev_name);
1319 	if (!bdev) {
1320 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
1321 		return -1;
1322 	}
1323 
1324 	if (!spdk_bdev_is_zoned(bdev)) {
1325 		SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
1326 			    spdk_bdev_get_name(bdev));
1327 		return -1;
1328 	}
1329 
1330 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
1331 			       dev, &dev->base_bdev_desc)) {
1332 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
1333 		return -1;
1334 	}
1335 
1336 	if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
1337 		spdk_bdev_close(dev->base_bdev_desc);
1338 		dev->base_bdev_desc = NULL;
1339 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
1340 		return -1;
1341 	}
1342 
1343 	dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
1344 	dev->md_size = spdk_bdev_get_md_size(bdev);
1345 
1346 	block_size = spdk_bdev_get_block_size(bdev);
1347 	if (block_size != FTL_BLOCK_SIZE) {
1348 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
1349 		return -1;
1350 	}
1351 
1352 	num_blocks = spdk_bdev_get_num_blocks(bdev);
1353 	if (num_blocks % ftl_get_num_punits(dev)) {
1354 		SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
1355 			    "of optimal number of zones.\n");
1356 		return -1;
1357 	}
1358 
1359 	if (ftl_is_append_supported(dev) &&
1360 	    !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1361 		SPDK_ERRLOG("Bdev dosen't support append: %s\n",
1362 			    spdk_bdev_get_name(bdev));
1363 		return -1;
1364 	}
1365 
1366 	dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
1367 	dev->addr_len = spdk_u64log2(num_blocks) + 1;
1368 
1369 	return 0;
1370 }
1371 
1372 static void
1373 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1374 {
1375 	struct ftl_lba_map_request *request = obj;
1376 
1377 	spdk_bit_array_free(&request->segments);
1378 }
1379 
1380 static void
1381 ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
1382 {
1383 	if (!bdev_desc) {
1384 		return;
1385 	}
1386 
1387 	spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
1388 	spdk_bdev_close(bdev_desc);
1389 }
1390 
1391 static void
1392 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1393 {
1394 	struct spdk_ftl_dev *iter;
1395 	size_t i;
1396 
1397 	if (!dev) {
1398 		return;
1399 	}
1400 
1401 	pthread_mutex_lock(&g_ftl_queue_lock);
1402 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1403 		if (iter == dev) {
1404 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1405 			break;
1406 		}
1407 	}
1408 	pthread_mutex_unlock(&g_ftl_queue_lock);
1409 
1410 	assert(LIST_EMPTY(&dev->wptr_list));
1411 	assert(dev->current_batch == NULL);
1412 
1413 	ftl_dev_dump_bands(dev);
1414 	ftl_dev_dump_stats(dev);
1415 
1416 	if (dev->bands) {
1417 		for (i = 0; i < ftl_get_num_bands(dev); ++i) {
1418 			free(dev->bands[i].zone_buf);
1419 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1420 			spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
1421 		}
1422 	}
1423 
1424 	spdk_dma_free(dev->nv_cache.dma_buf);
1425 
1426 	spdk_mempool_free(dev->lba_pool);
1427 	spdk_mempool_free(dev->nv_cache.md_pool);
1428 	spdk_mempool_free(dev->media_events_pool);
1429 	if (dev->lba_request_pool) {
1430 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1431 	}
1432 	spdk_mempool_free(dev->lba_request_pool);
1433 
1434 	ftl_reloc_free(dev->reloc);
1435 
1436 	ftl_release_bdev(dev->nv_cache.bdev_desc);
1437 	ftl_release_bdev(dev->base_bdev_desc);
1438 
1439 	spdk_free(dev->md_buf);
1440 
1441 	assert(dev->num_io_channels == 0);
1442 	free(dev->ioch_array);
1443 	free(dev->iov_buf);
1444 	free(dev->name);
1445 	free(dev->bands);
1446 	if (dev->l2p_pmem_len != 0) {
1447 #ifdef SPDK_CONFIG_PMDK
1448 		pmem_unmap(dev->l2p, dev->l2p_pmem_len);
1449 #endif /* SPDK_CONFIG_PMDK */
1450 	} else {
1451 		free(dev->l2p);
1452 	}
1453 	free((char *)dev->conf.l2p_path);
1454 	free(dev);
1455 }
1456 
1457 int
1458 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
1459 {
1460 	struct spdk_ftl_dev *dev;
1461 	struct spdk_ftl_dev_init_opts opts = *_opts;
1462 	struct ftl_dev_init_ctx *init_ctx = NULL;
1463 	int rc = -ENOMEM;
1464 
1465 	dev = calloc(1, sizeof(*dev));
1466 	if (!dev) {
1467 		return -ENOMEM;
1468 	}
1469 
1470 	init_ctx = calloc(1, sizeof(*init_ctx));
1471 	if (!init_ctx) {
1472 		goto fail_sync;
1473 	}
1474 
1475 	init_ctx->dev = dev;
1476 	init_ctx->opts = *_opts;
1477 	init_ctx->cb_fn = cb_fn;
1478 	init_ctx->cb_arg = cb_arg;
1479 	init_ctx->thread = spdk_get_thread();
1480 
1481 	if (!opts.conf) {
1482 		opts.conf = &g_default_conf;
1483 	}
1484 
1485 	if (!opts.base_bdev) {
1486 		SPDK_ERRLOG("Lack of underlying device in configuration\n");
1487 		rc = -EINVAL;
1488 		goto fail_sync;
1489 	}
1490 
1491 	dev->conf = *opts.conf;
1492 	dev->limit = SPDK_FTL_LIMIT_MAX;
1493 
1494 	dev->name = strdup(opts.name);
1495 	if (!dev->name) {
1496 		SPDK_ERRLOG("Unable to set device name\n");
1497 		goto fail_sync;
1498 	}
1499 
1500 	if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
1501 		SPDK_ERRLOG("Unsupported underlying device\n");
1502 		goto fail_sync;
1503 	}
1504 
1505 	if (opts.conf->l2p_path) {
1506 		dev->conf.l2p_path = strdup(opts.conf->l2p_path);
1507 		if (!dev->conf.l2p_path) {
1508 			rc = -ENOMEM;
1509 			goto fail_sync;
1510 		}
1511 	}
1512 
1513 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1514 	/* so we don't have to clean up in each of the init functions. */
1515 	if (ftl_check_conf(dev, opts.conf)) {
1516 		SPDK_ERRLOG("Invalid device configuration\n");
1517 		goto fail_sync;
1518 	}
1519 
1520 	if (ftl_init_lba_map_pools(dev)) {
1521 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1522 		goto fail_sync;
1523 	}
1524 
1525 	if (ftl_init_media_events_pool(dev)) {
1526 		SPDK_ERRLOG("Unable to init media events pools\n");
1527 		goto fail_sync;
1528 	}
1529 
1530 	ftl_init_wptr_list(dev);
1531 
1532 	if (ftl_dev_init_bands(dev)) {
1533 		SPDK_ERRLOG("Unable to initialize band array\n");
1534 		goto fail_sync;
1535 	}
1536 
1537 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
1538 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1539 		goto fail_sync;
1540 	}
1541 
1542 	dev->reloc = ftl_reloc_init(dev);
1543 	if (!dev->reloc) {
1544 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1545 		goto fail_sync;
1546 	}
1547 
1548 	if (ftl_dev_init_io_channel(dev)) {
1549 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1550 		goto fail_sync;
1551 	}
1552 
1553 	if (ftl_dev_init_zones(init_ctx)) {
1554 		SPDK_ERRLOG("Failed to initialize zones\n");
1555 		goto fail_async;
1556 	}
1557 
1558 	return 0;
1559 fail_sync:
1560 	ftl_dev_free_sync(dev);
1561 	ftl_dev_free_init_ctx(init_ctx);
1562 	return rc;
1563 fail_async:
1564 	ftl_init_fail(init_ctx);
1565 	return 0;
1566 }
1567 
1568 static void
1569 _ftl_halt_defrag(void *arg)
1570 {
1571 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1572 }
1573 
1574 static void
1575 ftl_halt_complete_cb(void *ctx)
1576 {
1577 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1578 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1579 
1580 	/* Make sure core IO channel has already been released */
1581 	if (dev->num_io_channels > 0) {
1582 		spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1583 		return;
1584 	}
1585 
1586 	spdk_io_device_unregister(fini_ctx->dev, NULL);
1587 
1588 	ftl_dev_free_sync(fini_ctx->dev);
1589 	if (fini_ctx->cb_fn != NULL) {
1590 		fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
1591 	}
1592 
1593 	ftl_dev_free_init_ctx(fini_ctx);
1594 }
1595 
1596 static void
1597 ftl_put_io_channel_cb(void *ctx)
1598 {
1599 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1600 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1601 
1602 	spdk_put_io_channel(dev->ioch);
1603 	spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1604 }
1605 
1606 static void
1607 ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1608 {
1609 	struct ftl_dev_init_ctx *fini_ctx = cb_arg;
1610 	int rc = 0;
1611 
1612 	spdk_bdev_free_io(bdev_io);
1613 	if (spdk_unlikely(!success)) {
1614 		SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
1615 		rc = -EIO;
1616 	}
1617 
1618 	fini_ctx->halt_complete_status = rc;
1619 	spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1620 }
1621 
1622 static int
1623 ftl_halt_poller(void *ctx)
1624 {
1625 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1626 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1627 
1628 	if (!dev->core_poller) {
1629 		spdk_poller_unregister(&fini_ctx->poller);
1630 
1631 		if (ftl_dev_has_nv_cache(dev)) {
1632 			ftl_nv_cache_write_header(&dev->nv_cache, true,
1633 						  ftl_nv_cache_header_fini_cb, fini_ctx);
1634 		} else {
1635 			fini_ctx->halt_complete_status = 0;
1636 			spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1637 		}
1638 	}
1639 
1640 	return SPDK_POLLER_BUSY;
1641 }
1642 
1643 static void
1644 ftl_add_halt_poller(void *ctx)
1645 {
1646 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1647 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1648 
1649 	dev->halt = 1;
1650 
1651 	_ftl_halt_defrag(dev);
1652 
1653 	assert(!fini_ctx->poller);
1654 	fini_ctx->poller = SPDK_POLLER_REGISTER(ftl_halt_poller, fini_ctx, 100);
1655 }
1656 
1657 static int
1658 ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
1659 	     struct spdk_thread *thread)
1660 {
1661 	struct ftl_dev_init_ctx *fini_ctx;
1662 
1663 	if (dev->halt_started) {
1664 		dev->halt_started = true;
1665 		return -EBUSY;
1666 	}
1667 
1668 	fini_ctx = calloc(1, sizeof(*fini_ctx));
1669 	if (!fini_ctx) {
1670 		return -ENOMEM;
1671 	}
1672 
1673 	fini_ctx->dev = dev;
1674 	fini_ctx->cb_fn = cb_fn;
1675 	fini_ctx->cb_arg = cb_arg;
1676 	fini_ctx->thread = thread;
1677 
1678 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
1679 	return 0;
1680 }
1681 
1682 int
1683 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
1684 {
1685 	return ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
1686 }
1687 
1688 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1689