xref: /spdk/lib/ftl/ftl_init.c (revision 1914de09202410b6881d8bcff916ce78fb749ad6)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/string.h"
38 #include "spdk/likely.h"
39 #include "spdk_internal/log.h"
40 #include "spdk/ftl.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/bdev_zone.h"
44 #include "spdk/bdev_module.h"
45 
46 #include "ftl_core.h"
47 #include "ftl_io.h"
48 #include "ftl_reloc.h"
49 #include "ftl_band.h"
50 #include "ftl_debug.h"
51 
52 #define FTL_CORE_RING_SIZE	4096
53 #define FTL_INIT_TIMEOUT	30
54 #define FTL_NSID		1
55 #define FTL_ZONE_INFO_COUNT	64
56 
57 /* Dummy bdev module used to to claim bdevs. */
58 static struct spdk_bdev_module g_ftl_bdev_module = {
59 	.name	= "ftl_lib",
60 };
61 
62 typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int);
63 
64 struct ftl_dev_init_ctx {
65 	/* Owner */
66 	struct spdk_ftl_dev		*dev;
67 	/* Initial arguments */
68 	struct spdk_ftl_dev_init_opts	opts;
69 	/* IO channel for zone info retrieving */
70 	struct spdk_io_channel		*ioch;
71 	/* Buffer for reading zone info  */
72 	struct spdk_bdev_zone_info	info[FTL_ZONE_INFO_COUNT];
73 	/* Currently read zone */
74 	size_t				zone_id;
75 	/* User's callback */
76 	spdk_ftl_init_fn		cb_fn;
77 	/* Callback's argument */
78 	void				*cb_arg;
79 	/* Thread to call the callback on */
80 	struct spdk_thread		*thread;
81 	/* Poller to check if the device has been destroyed/initialized */
82 	struct spdk_poller		*poller;
83 	/* Status to return for halt completion callback */
84 	int				halt_complete_status;
85 };
86 
87 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
88 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
89 static const struct spdk_ftl_conf	g_default_conf = {
90 	.limits = {
91 		/* 5 free bands  / 0 % host writes */
92 		[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
93 		/* 10 free bands / 5 % host writes */
94 		[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
95 		/* 20 free bands / 40 % host writes */
96 		[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
97 		/* 40 free bands / 100 % host writes - defrag starts running */
98 		[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
99 	},
100 	/* 10 percent valid blocks */
101 	.invalid_thld = 10,
102 	/* 20% spare blocks */
103 	.lba_rsvd = 20,
104 	/* 6M write buffer per each IO channel */
105 	.write_buffer_size = 6 * 1024 * 1024,
106 	/* 90% band fill threshold */
107 	.band_thld = 90,
108 	/* Max 32 IO depth per band relocate */
109 	.max_reloc_qdepth = 32,
110 	/* Max 3 active band relocates */
111 	.max_active_relocs = 3,
112 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
113 	.user_io_pool_size = 2048,
114 	/*
115 	 * If clear ftl will return error when restoring after a dirty shutdown
116 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
117 	 * will result in lost data after recovery.
118 	 */
119 	.allow_open_bands = false,
120 	.max_io_channels = 128,
121 	.nv_cache = {
122 		/* Maximum number of concurrent requests */
123 		.max_request_cnt = 2048,
124 		/* Maximum number of blocks per request */
125 		.max_request_size = 16,
126 	}
127 };
128 
129 static int
130 ftl_band_init_md(struct ftl_band *band)
131 {
132 	struct ftl_lba_map *lba_map = &band->lba_map;
133 
134 	lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
135 	if (!lba_map->vld) {
136 		return -ENOMEM;
137 	}
138 
139 	pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
140 	ftl_band_md_clear(band);
141 	return 0;
142 }
143 
144 static int
145 ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
146 {
147 	size_t i;
148 
149 	if (conf->invalid_thld >= 100) {
150 		return -1;
151 	}
152 	if (conf->lba_rsvd >= 100) {
153 		return -1;
154 	}
155 	if (conf->lba_rsvd == 0) {
156 		return -1;
157 	}
158 	if (conf->write_buffer_size == 0) {
159 		return -1;
160 	}
161 	if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
162 		return -1;
163 	}
164 
165 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
166 		if (conf->limits[i].limit > 100) {
167 			return -1;
168 		}
169 	}
170 
171 	return 0;
172 }
173 
174 static int
175 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
176 {
177 	struct ftl_band *band, *pband;
178 	unsigned int i;
179 	int rc = 0;
180 
181 	LIST_INIT(&dev->free_bands);
182 	LIST_INIT(&dev->shut_bands);
183 
184 	dev->num_free = 0;
185 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
186 	if (!dev->bands) {
187 		return -1;
188 	}
189 
190 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
191 		band = &dev->bands[i];
192 		band->id = i;
193 		band->dev = dev;
194 		band->state = FTL_BAND_STATE_CLOSED;
195 
196 		if (LIST_EMPTY(&dev->shut_bands)) {
197 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
198 		} else {
199 			LIST_INSERT_AFTER(pband, band, list_entry);
200 		}
201 		pband = band;
202 
203 		CIRCLEQ_INIT(&band->zones);
204 		band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
205 		if (!band->zone_buf) {
206 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
207 			rc = -1;
208 			break;
209 		}
210 
211 		rc = ftl_band_init_md(band);
212 		if (rc) {
213 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
214 			break;
215 		}
216 
217 		band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
218 		if (!band->reloc_bitmap) {
219 			SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
220 			break;
221 		}
222 	}
223 
224 	return rc;
225 }
226 
227 static void
228 ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
229 {
230 	struct spdk_ftl_dev *dev = event_ctx;
231 
232 	switch (type) {
233 	case SPDK_BDEV_EVENT_REMOVE:
234 		assert(0);
235 		break;
236 	case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
237 		assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
238 		ftl_get_media_events(dev);
239 	default:
240 		break;
241 	}
242 }
243 
244 static int
245 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
246 {
247 	struct spdk_bdev *bdev;
248 	struct spdk_ftl_conf *conf = &dev->conf;
249 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
250 	char pool_name[128];
251 	int rc;
252 
253 	if (!bdev_name) {
254 		return 0;
255 	}
256 
257 	bdev = spdk_bdev_get_by_name(bdev_name);
258 	if (!bdev) {
259 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
260 		return -1;
261 	}
262 
263 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
264 			       dev, &nv_cache->bdev_desc)) {
265 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
266 		return -1;
267 	}
268 
269 	if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
270 		spdk_bdev_close(nv_cache->bdev_desc);
271 		nv_cache->bdev_desc = NULL;
272 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
273 		return -1;
274 	}
275 
276 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
277 		     spdk_bdev_get_name(bdev));
278 
279 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
280 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
281 		return -1;
282 	}
283 
284 	if (!spdk_bdev_is_md_separate(bdev)) {
285 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
286 			    spdk_bdev_get_name(bdev));
287 		return -1;
288 	}
289 
290 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
291 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
292 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
293 		return -1;
294 	}
295 
296 	if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
297 		SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
298 			    spdk_bdev_get_name(bdev));
299 		return -1;
300 	}
301 
302 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
303 	 * from the fact that cache works as a protection against power loss, so before the data
304 	 * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
305 	 * extra block is needed to store the header.
306 	 */
307 	if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
308 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
309 			    PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
310 			    ftl_get_num_blocks_in_band(dev) * 2 + 1);
311 		return -1;
312 	}
313 
314 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
315 	if (rc < 0 || rc >= 128) {
316 		return -1;
317 	}
318 
319 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
320 						spdk_bdev_get_md_size(bdev) *
321 						conf->nv_cache.max_request_size,
322 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
323 						SPDK_ENV_SOCKET_ID_ANY);
324 	if (!nv_cache->md_pool) {
325 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
326 		return -1;
327 	}
328 
329 	nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
330 	if (!nv_cache->dma_buf) {
331 		SPDK_ERRLOG("Memory allocation failure\n");
332 		return -1;
333 	}
334 
335 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
336 		SPDK_ERRLOG("Failed to initialize cache lock\n");
337 		return -1;
338 	}
339 
340 	nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
341 	nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
342 	nv_cache->num_available = nv_cache->num_data_blocks;
343 	nv_cache->ready = false;
344 
345 	return 0;
346 }
347 
348 void
349 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
350 {
351 	*conf = g_default_conf;
352 }
353 
354 static void
355 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
356 {
357 	struct ftl_lba_map_request *request = obj;
358 	struct spdk_ftl_dev *dev = opaque;
359 
360 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
361 				    ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
362 }
363 
364 static int
365 ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
366 {
367 	char pool_name[128];
368 	int rc;
369 
370 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
371 	if (rc < 0 || rc >= (int)sizeof(pool_name)) {
372 		SPDK_ERRLOG("Failed to create media pool name\n");
373 		return -1;
374 	}
375 
376 	dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
377 				 sizeof(struct ftl_media_event),
378 				 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
379 				 SPDK_ENV_SOCKET_ID_ANY);
380 	if (!dev->media_events_pool) {
381 		SPDK_ERRLOG("Failed to create media events pool\n");
382 		return -1;
383 	}
384 
385 	return 0;
386 }
387 
388 static int
389 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
390 {
391 #define POOL_NAME_LEN 128
392 	char pool_name[POOL_NAME_LEN];
393 	int rc;
394 
395 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
396 	if (rc < 0 || rc >= POOL_NAME_LEN) {
397 		return -ENAMETOOLONG;
398 	}
399 
400 	/* We need to reserve at least 2 buffers for band close / open sequence
401 	 * alone, plus additional (8) buffers for handling write errors.
402 	 * TODO: This memory pool is utilized only by core thread - it introduce
403 	 * unnecessary overhead and should be replaced by different data structure.
404 	 */
405 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
406 					    ftl_lba_map_pool_elem_size(dev),
407 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
408 					    SPDK_ENV_SOCKET_ID_ANY);
409 	if (!dev->lba_pool) {
410 		return -ENOMEM;
411 	}
412 
413 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
414 	if (rc < 0 || rc >= POOL_NAME_LEN) {
415 		return -ENAMETOOLONG;
416 	}
417 
418 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
419 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
420 				sizeof(struct ftl_lba_map_request),
421 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
422 				SPDK_ENV_SOCKET_ID_ANY,
423 				ftl_lba_map_request_ctor,
424 				dev);
425 	if (!dev->lba_request_pool) {
426 		return -ENOMEM;
427 	}
428 
429 	return 0;
430 }
431 
432 static void
433 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
434 {
435 	LIST_INIT(&dev->wptr_list);
436 	LIST_INIT(&dev->flush_list);
437 	LIST_INIT(&dev->band_flush_list);
438 }
439 
440 static size_t
441 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
442 {
443 	struct ftl_band *band;
444 	size_t seq = 0;
445 
446 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
447 		if (band->seq > seq) {
448 			seq = band->seq;
449 		}
450 	}
451 
452 	return seq;
453 }
454 
455 static void
456 _ftl_init_bands_state(void *ctx)
457 {
458 	struct ftl_band *band, *temp_band;
459 	struct spdk_ftl_dev *dev = ctx;
460 
461 	dev->seq = ftl_dev_band_max_seq(dev);
462 
463 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
464 		if (!band->lba_map.num_vld) {
465 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
466 		}
467 	}
468 
469 	ftl_reloc_resume(dev->reloc);
470 	/* Clear the limit applications as they're incremented incorrectly by */
471 	/* the initialization code */
472 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
473 }
474 
475 static int
476 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
477 {
478 	struct ftl_band *band;
479 	int cnt = 0;
480 
481 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
482 		if (band->num_zones && !band->lba_map.num_vld) {
483 			cnt++;
484 		}
485 	}
486 	return cnt;
487 }
488 
489 static int
490 ftl_init_bands_state(struct spdk_ftl_dev *dev)
491 {
492 	/* TODO: Should we abort initialization or expose read only device */
493 	/* if there is no free bands? */
494 	/* If we abort initialization should we depend on condition that */
495 	/* we have no free bands or should we have some minimal number of */
496 	/* free bands? */
497 	if (!ftl_init_num_free_bands(dev)) {
498 		return -1;
499 	}
500 
501 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
502 	return 0;
503 }
504 
505 static void
506 _ftl_dev_init_core_thread(void *ctx)
507 {
508 	struct spdk_ftl_dev *dev = ctx;
509 
510 	dev->core_poller = spdk_poller_register(ftl_task_core, dev, 0);
511 	if (!dev->core_poller) {
512 		SPDK_ERRLOG("Unable to register core poller\n");
513 		assert(0);
514 	}
515 
516 	dev->ioch = spdk_get_io_channel(dev);
517 }
518 
519 static int
520 ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
521 {
522 	if (!opts->core_thread) {
523 		return -1;
524 	}
525 
526 	dev->core_thread = opts->core_thread;
527 
528 	spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
529 	return 0;
530 }
531 
532 static int
533 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
534 {
535 	size_t addr_size;
536 
537 	if (dev->num_lbas == 0) {
538 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
539 		return -1;
540 	}
541 
542 	if (dev->l2p) {
543 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
544 		return -1;
545 	}
546 
547 	addr_size = dev->addr_len >= 32 ? 8 : 4;
548 	dev->l2p = malloc(dev->num_lbas * addr_size);
549 	if (!dev->l2p) {
550 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
551 		return -1;
552 	}
553 
554 	memset(dev->l2p, FTL_ADDR_INVALID, dev->num_lbas * addr_size);
555 
556 	return 0;
557 }
558 
559 static void
560 ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
561 {
562 	if (!init_ctx) {
563 		return;
564 	}
565 
566 	if (init_ctx->ioch) {
567 		spdk_put_io_channel(init_ctx->ioch);
568 	}
569 
570 	free(init_ctx);
571 }
572 
573 static void
574 ftl_call_init_complete_cb(void *ctx)
575 {
576 	struct ftl_dev_init_ctx *init_ctx = ctx;
577 	struct spdk_ftl_dev *dev = init_ctx->dev;
578 
579 	if (init_ctx->cb_fn != NULL) {
580 		init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
581 	}
582 
583 	ftl_dev_free_init_ctx(init_ctx);
584 }
585 
586 static void
587 ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
588 {
589 	struct spdk_ftl_dev *dev = init_ctx->dev;
590 
591 	pthread_mutex_lock(&g_ftl_queue_lock);
592 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
593 	pthread_mutex_unlock(&g_ftl_queue_lock);
594 
595 	dev->initialized = 1;
596 
597 	spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
598 }
599 
600 static void
601 ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
602 {
603 	struct ftl_dev_init_ctx *init_ctx = ctx;
604 
605 	if (init_ctx->cb_fn != NULL) {
606 		init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
607 	}
608 
609 	ftl_dev_free_init_ctx(init_ctx);
610 }
611 
612 static int _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
613 			      struct spdk_thread *thread);
614 
615 static void
616 ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
617 {
618 	if (_spdk_ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
619 		SPDK_ERRLOG("Unable to free the device\n");
620 		assert(0);
621 	}
622 }
623 
624 static void
625 ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
626 {
627 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
628 	struct spdk_ftl_dev *dev = init_ctx->dev;
629 
630 	spdk_bdev_free_io(bdev_io);
631 	if (spdk_unlikely(!success)) {
632 		SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
633 		ftl_init_fail(init_ctx);
634 		return;
635 	}
636 
637 	dev->nv_cache.ready = true;
638 	ftl_init_complete(init_ctx);
639 }
640 
641 static void
642 ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
643 {
644 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
645 	struct spdk_ftl_dev *dev = init_ctx->dev;
646 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
647 
648 	spdk_bdev_free_io(bdev_io);
649 	if (spdk_unlikely(!success)) {
650 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
651 		ftl_init_fail(init_ctx);
652 		return;
653 	}
654 
655 	nv_cache->phase = 1;
656 	if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
657 		SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
658 		ftl_init_fail(init_ctx);
659 	}
660 }
661 
662 static void
663 _ftl_nv_cache_scrub(void *ctx)
664 {
665 	struct ftl_dev_init_ctx *init_ctx = ctx;
666 	struct spdk_ftl_dev *dev = init_ctx->dev;
667 	int rc;
668 
669 	rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
670 
671 	if (spdk_unlikely(rc != 0)) {
672 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
673 			    spdk_strerror(-rc));
674 		ftl_init_fail(init_ctx);
675 	}
676 }
677 
678 static int
679 ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
680 {
681 	struct spdk_ftl_dev *dev = init_ctx->dev;
682 	struct spdk_ftl_conf *conf = &dev->conf;
683 	size_t i;
684 
685 	spdk_uuid_generate(&dev->uuid);
686 
687 	dev->num_lbas = 0;
688 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
689 		dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
690 	}
691 
692 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
693 
694 	if (ftl_dev_l2p_alloc(dev)) {
695 		SPDK_ERRLOG("Unable to init l2p table\n");
696 		return -1;
697 	}
698 
699 	if (ftl_init_bands_state(dev)) {
700 		SPDK_ERRLOG("Unable to finish the initialization\n");
701 		return -1;
702 	}
703 
704 	if (!ftl_dev_has_nv_cache(dev)) {
705 		ftl_init_complete(init_ctx);
706 	} else {
707 		spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
708 	}
709 
710 	return 0;
711 }
712 
713 static void
714 ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
715 {
716 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
717 
718 	if (spdk_unlikely(status != 0)) {
719 		SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
720 		ftl_init_fail(init_ctx);
721 		return;
722 	}
723 
724 	ftl_init_complete(init_ctx);
725 }
726 
727 static void
728 ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
729 {
730 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
731 	struct spdk_ftl_dev *dev = init_ctx->dev;
732 
733 	if (status) {
734 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
735 		ftl_init_fail(init_ctx);
736 		return;
737 	}
738 
739 	if (ftl_init_bands_state(dev)) {
740 		SPDK_ERRLOG("Unable to finish the initialization\n");
741 		ftl_init_fail(init_ctx);
742 		return;
743 	}
744 
745 	if (!ftl_dev_has_nv_cache(dev)) {
746 		ftl_init_complete(init_ctx);
747 		return;
748 	}
749 
750 	ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
751 }
752 
753 static void
754 ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
755 {
756 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
757 
758 	if (status) {
759 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
760 		goto error;
761 	}
762 
763 	/* After the metadata is read it should be possible to allocate the L2P */
764 	if (ftl_dev_l2p_alloc(init_ctx->dev)) {
765 		SPDK_ERRLOG("Failed to allocate the L2P\n");
766 		goto error;
767 	}
768 
769 	if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
770 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
771 		goto error;
772 	}
773 
774 	return;
775 error:
776 	ftl_init_fail(init_ctx);
777 }
778 
779 static int
780 ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
781 {
782 	struct spdk_ftl_dev *dev = init_ctx->dev;
783 
784 	dev->uuid = init_ctx->opts.uuid;
785 
786 	if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
787 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
788 		return -1;
789 	}
790 
791 	return 0;
792 }
793 
794 static void
795 ftl_dev_update_bands(struct spdk_ftl_dev *dev)
796 {
797 	struct ftl_band *band, *temp_band;
798 	size_t i;
799 
800 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
801 		band = &dev->bands[i];
802 		band->tail_md_addr = ftl_band_tail_md_addr(band);
803 	}
804 
805 	/* Remove band from shut_bands list to prevent further processing */
806 	/* if all blocks on this band are bad */
807 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
808 		if (!band->num_zones) {
809 			dev->num_bands--;
810 			LIST_REMOVE(band, list_entry);
811 		}
812 	}
813 }
814 
815 static void
816 ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
817 {
818 	struct spdk_ftl_dev *dev = init_ctx->dev;
819 
820 	ftl_dev_update_bands(dev);
821 
822 	if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
823 		SPDK_ERRLOG("Unable to initialize device thread\n");
824 		ftl_init_fail(init_ctx);
825 		return;
826 	}
827 
828 	if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
829 		if (ftl_setup_initial_state(init_ctx)) {
830 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
831 			ftl_init_fail(init_ctx);
832 			return;
833 		}
834 	} else {
835 		if (ftl_restore_state(init_ctx)) {
836 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
837 			ftl_init_fail(init_ctx);
838 			return;
839 		}
840 	}
841 }
842 
843 static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
844 
845 static void
846 ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
847 {
848 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
849 	struct spdk_ftl_dev *dev = init_ctx->dev;
850 	struct ftl_band *band;
851 	struct ftl_zone *zone;
852 	struct ftl_addr addr;
853 	size_t i, zones_left, num_zones;
854 
855 	spdk_bdev_free_io(bdev_io);
856 
857 	if (spdk_unlikely(!success)) {
858 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
859 		ftl_init_fail(init_ctx);
860 		return;
861 	}
862 
863 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
864 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
865 
866 	for (i = 0; i < num_zones; ++i) {
867 		addr.offset = init_ctx->info[i].zone_id;
868 		band = &dev->bands[ftl_addr_get_band(dev, addr)];
869 		zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
870 		zone->info = init_ctx->info[i];
871 
872 		/* TODO: add support for zone capacity less than zone size */
873 		if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
874 			zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
875 			SPDK_ERRLOG("Zone capacity is not equal zone size for "
876 				    "zone id: %"PRIu64"\n", init_ctx->zone_id);
877 		}
878 
879 		if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
880 			band->num_zones++;
881 			CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
882 		}
883 	}
884 
885 	init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
886 
887 	ftl_dev_get_zone_info(init_ctx);
888 }
889 
890 static void
891 ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
892 {
893 	struct spdk_ftl_dev *dev = init_ctx->dev;
894 	size_t zones_left, num_zones;
895 	int rc;
896 
897 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
898 	if (zones_left == 0) {
899 		ftl_dev_init_state(init_ctx);
900 		return;
901 	}
902 
903 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
904 
905 	rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
906 				     init_ctx->zone_id, num_zones, init_ctx->info,
907 				     ftl_dev_get_zone_info_cb, init_ctx);
908 
909 	if (spdk_unlikely(rc != 0)) {
910 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
911 		ftl_init_fail(init_ctx);
912 	}
913 }
914 
915 static int
916 ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
917 {
918 	struct spdk_ftl_dev *dev =  init_ctx->dev;
919 
920 	init_ctx->zone_id = 0;
921 	init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
922 	if (!init_ctx->ioch) {
923 		SPDK_ERRLOG("Failed to get base bdev IO channel\n");
924 		return -1;
925 	}
926 
927 	ftl_dev_get_zone_info(init_ctx);
928 
929 	return 0;
930 }
931 
932 struct _ftl_io_channel {
933 	struct ftl_io_channel *ioch;
934 };
935 
936 struct ftl_io_channel *
937 ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
938 {
939 	struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
940 
941 	return _ioch->ioch;
942 }
943 
944 static void
945 ftl_io_channel_register(void *ctx)
946 {
947 	struct ftl_io_channel *ioch = ctx;
948 	struct spdk_ftl_dev *dev = ioch->dev;
949 	uint32_t ioch_index;
950 
951 	for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
952 		if (dev->ioch_array[ioch_index] == NULL) {
953 			dev->ioch_array[ioch_index] = ioch;
954 			ioch->index = ioch_index;
955 			break;
956 		}
957 	}
958 
959 	assert(ioch_index < dev->conf.max_io_channels);
960 	TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
961 }
962 
963 static int
964 ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
965 {
966 	struct spdk_ftl_dev *dev = ioch->dev;
967 	struct ftl_wbuf_entry *entry;
968 	uint32_t i;
969 	int rc;
970 
971 	ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
972 	ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
973 	if (ioch->wbuf_entries == NULL) {
974 		SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
975 		return -1;
976 	}
977 
978 	ioch->qdepth_limit = ioch->num_entries;
979 	ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
980 					  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
981 	if (ioch->wbuf_payload == NULL) {
982 		SPDK_ERRLOG("Failed to allocate write buffer payload\n");
983 		goto error_entries;
984 	}
985 
986 	ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
987 					    spdk_align32pow2(ioch->num_entries + 1),
988 					    SPDK_ENV_SOCKET_ID_ANY);
989 	if (ioch->free_queue == NULL) {
990 		SPDK_ERRLOG("Failed to allocate free queue\n");
991 		goto error_payload;
992 	}
993 
994 	ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
995 					      spdk_align32pow2(ioch->num_entries + 1),
996 					      SPDK_ENV_SOCKET_ID_ANY);
997 	if (ioch->submit_queue == NULL) {
998 		SPDK_ERRLOG("Failed to allocate submit queue\n");
999 		goto error_free_queue;
1000 	}
1001 
1002 	for (i = 0; i < ioch->num_entries; ++i) {
1003 		entry = &ioch->wbuf_entries[i];
1004 		entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
1005 		entry->ioch = ioch;
1006 		entry->index = i;
1007 		entry->addr.offset = FTL_ADDR_INVALID;
1008 
1009 		rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
1010 		if (rc != 0) {
1011 			SPDK_ERRLOG("Failed to initialize spinlock\n");
1012 			goto error_spinlock;
1013 		}
1014 
1015 		spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
1016 	}
1017 
1018 	return 0;
1019 error_spinlock:
1020 	for (; i > 0; --i) {
1021 		pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
1022 	}
1023 
1024 	spdk_ring_free(ioch->submit_queue);
1025 error_free_queue:
1026 	spdk_ring_free(ioch->free_queue);
1027 error_payload:
1028 	spdk_free(ioch->wbuf_payload);
1029 error_entries:
1030 	free(ioch->wbuf_entries);
1031 
1032 	return -1;
1033 }
1034 
1035 static int
1036 ftl_io_channel_create_cb(void *io_device, void *ctx)
1037 {
1038 	struct spdk_ftl_dev *dev = io_device;
1039 	struct _ftl_io_channel *_ioch = ctx;
1040 	struct ftl_io_channel *ioch;
1041 	uint32_t num_io_channels;
1042 	char mempool_name[32];
1043 	int rc;
1044 
1045 	num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1046 	if (num_io_channels >= dev->conf.max_io_channels) {
1047 		SPDK_ERRLOG("Reached maximum number of IO channels\n");
1048 		__atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1049 		return -1;
1050 	}
1051 
1052 	ioch = calloc(1, sizeof(*ioch));
1053 	if (ioch == NULL) {
1054 		SPDK_ERRLOG("Failed to allocate IO channel\n");
1055 		return -1;
1056 	}
1057 
1058 	rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
1059 	if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
1060 		SPDK_ERRLOG("Failed to create IO channel pool name\n");
1061 		free(ioch);
1062 		return -1;
1063 	}
1064 
1065 	ioch->cache_ioch = NULL;
1066 	ioch->dev = dev;
1067 	ioch->elem_size = sizeof(struct ftl_md_io);
1068 	ioch->io_pool = spdk_mempool_create(mempool_name,
1069 					    dev->conf.user_io_pool_size,
1070 					    ioch->elem_size,
1071 					    0,
1072 					    SPDK_ENV_SOCKET_ID_ANY);
1073 	if (!ioch->io_pool) {
1074 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
1075 		free(ioch);
1076 		return -1;
1077 	}
1078 
1079 	ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
1080 	if (!ioch->base_ioch) {
1081 		SPDK_ERRLOG("Failed to create base bdev IO channel\n");
1082 		goto fail_ioch;
1083 	}
1084 
1085 	if (ftl_dev_has_nv_cache(dev)) {
1086 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
1087 		if (!ioch->cache_ioch) {
1088 			SPDK_ERRLOG("Failed to create cache IO channel\n");
1089 			goto fail_cache;
1090 		}
1091 	}
1092 
1093 	TAILQ_INIT(&ioch->write_cmpl_queue);
1094 	TAILQ_INIT(&ioch->retry_queue);
1095 	ioch->poller = spdk_poller_register(ftl_io_channel_poll, ioch, 0);
1096 	if (!ioch->poller) {
1097 		SPDK_ERRLOG("Failed to register IO channel poller\n");
1098 		goto fail_poller;
1099 	}
1100 
1101 	if (ftl_io_channel_init_wbuf(ioch)) {
1102 		SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
1103 		goto fail_wbuf;
1104 	}
1105 
1106 	_ioch->ioch = ioch;
1107 
1108 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
1109 
1110 	return 0;
1111 fail_wbuf:
1112 	spdk_poller_unregister(&ioch->poller);
1113 fail_poller:
1114 	if (ioch->cache_ioch) {
1115 		spdk_put_io_channel(ioch->cache_ioch);
1116 	}
1117 fail_cache:
1118 	spdk_put_io_channel(ioch->base_ioch);
1119 fail_ioch:
1120 	spdk_mempool_free(ioch->io_pool);
1121 	free(ioch);
1122 
1123 	return -1;
1124 }
1125 
1126 static void
1127 ftl_io_channel_unregister(void *ctx)
1128 {
1129 	struct ftl_io_channel *ioch = ctx;
1130 	struct spdk_ftl_dev *dev = ioch->dev;
1131 	uint32_t i, num_io_channels __attribute__((unused));
1132 
1133 	assert(ioch->index < dev->conf.max_io_channels);
1134 	assert(dev->ioch_array[ioch->index] == ioch);
1135 
1136 	dev->ioch_array[ioch->index] = NULL;
1137 	TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
1138 
1139 	num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1140 	assert(num_io_channels > 0);
1141 
1142 	for (i = 0; i < ioch->num_entries; ++i) {
1143 		pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
1144 	}
1145 
1146 	spdk_mempool_free(ioch->io_pool);
1147 	spdk_ring_free(ioch->free_queue);
1148 	spdk_ring_free(ioch->submit_queue);
1149 	spdk_free(ioch->wbuf_payload);
1150 	free(ioch->wbuf_entries);
1151 	free(ioch);
1152 }
1153 
1154 static void
1155 _ftl_io_channel_destroy_cb(void *ctx)
1156 {
1157 	struct ftl_io_channel *ioch = ctx;
1158 	struct spdk_ftl_dev *dev = ioch->dev;
1159 
1160 	/* Do not destroy the channel if some of its entries are still in use */
1161 	if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
1162 		spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
1163 		return;
1164 	}
1165 
1166 	spdk_poller_unregister(&ioch->poller);
1167 
1168 	spdk_put_io_channel(ioch->base_ioch);
1169 	if (ioch->cache_ioch) {
1170 		spdk_put_io_channel(ioch->cache_ioch);
1171 	}
1172 
1173 	ioch->base_ioch = NULL;
1174 	ioch->cache_ioch = NULL;
1175 
1176 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
1177 }
1178 
1179 static void
1180 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
1181 {
1182 	struct _ftl_io_channel *_ioch = ctx;
1183 	struct ftl_io_channel *ioch = _ioch->ioch;
1184 
1185 	/* Mark the IO channel as being flush to force out any unwritten entries */
1186 	ioch->flush = true;
1187 
1188 	_ftl_io_channel_destroy_cb(ioch);
1189 }
1190 
1191 static int
1192 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
1193 {
1194 	struct ftl_batch *batch;
1195 	uint32_t i;
1196 
1197 	/* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
1198 	dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
1199 	dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
1200 
1201 	dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
1202 	if (!dev->ioch_array) {
1203 		SPDK_ERRLOG("Failed to allocate IO channel array\n");
1204 		return -1;
1205 	}
1206 
1207 	if (dev->md_size > 0) {
1208 		dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
1209 					   dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
1210 					   SPDK_MALLOC_DMA);
1211 		if (dev->md_buf == NULL) {
1212 			SPDK_ERRLOG("Failed to allocate metadata buffer\n");
1213 			return -1;
1214 		}
1215 	}
1216 
1217 	dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
1218 	if (!dev->iov_buf) {
1219 		SPDK_ERRLOG("Failed to allocate iovec buffer\n");
1220 		return -1;
1221 	}
1222 
1223 	TAILQ_INIT(&dev->free_batches);
1224 	TAILQ_INIT(&dev->pending_batches);
1225 	TAILQ_INIT(&dev->ioch_queue);
1226 
1227 	for (i = 0; i < FTL_BATCH_COUNT; ++i) {
1228 		batch = &dev->batch_array[i];
1229 		batch->iov = &dev->iov_buf[i * dev->xfer_size];
1230 		batch->num_entries = 0;
1231 		batch->index = i;
1232 		TAILQ_INIT(&batch->entries);
1233 		if (dev->md_buf != NULL) {
1234 			batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
1235 		}
1236 
1237 		TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
1238 	}
1239 
1240 	dev->num_io_channels = 0;
1241 
1242 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
1243 				sizeof(struct _ftl_io_channel),
1244 				NULL);
1245 
1246 	return 0;
1247 }
1248 
1249 static int
1250 ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
1251 {
1252 	uint32_t block_size;
1253 	uint64_t num_blocks;
1254 	struct spdk_bdev *bdev;
1255 
1256 	bdev = spdk_bdev_get_by_name(bdev_name);
1257 	if (!bdev) {
1258 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
1259 		return -1;
1260 	}
1261 
1262 	if (!spdk_bdev_is_zoned(bdev)) {
1263 		SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
1264 			    spdk_bdev_get_name(bdev));
1265 		return -1;
1266 	}
1267 
1268 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
1269 			       dev, &dev->base_bdev_desc)) {
1270 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
1271 		return -1;
1272 	}
1273 
1274 	if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
1275 		spdk_bdev_close(dev->base_bdev_desc);
1276 		dev->base_bdev_desc = NULL;
1277 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
1278 		return -1;
1279 	}
1280 
1281 	dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
1282 	dev->md_size = spdk_bdev_get_md_size(bdev);
1283 
1284 	block_size = spdk_bdev_get_block_size(bdev);
1285 	if (block_size != FTL_BLOCK_SIZE) {
1286 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
1287 		return -1;
1288 	}
1289 
1290 	num_blocks = spdk_bdev_get_num_blocks(bdev);
1291 	if (num_blocks % ftl_get_num_punits(dev)) {
1292 		SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
1293 			    "of optimal number of zones.\n");
1294 		return -1;
1295 	}
1296 
1297 	if (ftl_is_append_supported(dev) &&
1298 	    !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1299 		SPDK_ERRLOG("Bdev dosen't support append: %s\n",
1300 			    spdk_bdev_get_name(bdev));
1301 		return -1;
1302 	}
1303 
1304 	dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
1305 	dev->addr_len = spdk_u64log2(num_blocks) + 1;
1306 
1307 	return 0;
1308 }
1309 
1310 static void
1311 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1312 {
1313 	struct ftl_lba_map_request *request = obj;
1314 
1315 	spdk_bit_array_free(&request->segments);
1316 }
1317 
1318 static void
1319 ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
1320 {
1321 	if (!bdev_desc) {
1322 		return;
1323 	}
1324 
1325 	spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
1326 	spdk_bdev_close(bdev_desc);
1327 }
1328 
1329 static void
1330 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1331 {
1332 	struct spdk_ftl_dev *iter;
1333 	size_t i;
1334 
1335 	if (!dev) {
1336 		return;
1337 	}
1338 
1339 	pthread_mutex_lock(&g_ftl_queue_lock);
1340 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1341 		if (iter == dev) {
1342 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1343 			break;
1344 		}
1345 	}
1346 	pthread_mutex_unlock(&g_ftl_queue_lock);
1347 
1348 	assert(LIST_EMPTY(&dev->wptr_list));
1349 	assert(dev->current_batch == NULL);
1350 
1351 	ftl_dev_dump_bands(dev);
1352 	ftl_dev_dump_stats(dev);
1353 
1354 	spdk_io_device_unregister(dev, NULL);
1355 
1356 	if (dev->bands) {
1357 		for (i = 0; i < ftl_get_num_bands(dev); ++i) {
1358 			free(dev->bands[i].zone_buf);
1359 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1360 			spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
1361 		}
1362 	}
1363 
1364 	spdk_dma_free(dev->nv_cache.dma_buf);
1365 
1366 	spdk_mempool_free(dev->lba_pool);
1367 	spdk_mempool_free(dev->nv_cache.md_pool);
1368 	spdk_mempool_free(dev->media_events_pool);
1369 	if (dev->lba_request_pool) {
1370 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1371 	}
1372 	spdk_mempool_free(dev->lba_request_pool);
1373 
1374 	ftl_reloc_free(dev->reloc);
1375 
1376 	ftl_release_bdev(dev->nv_cache.bdev_desc);
1377 	ftl_release_bdev(dev->base_bdev_desc);
1378 
1379 	spdk_free(dev->md_buf);
1380 
1381 	assert(dev->num_io_channels == 0);
1382 	free(dev->ioch_array);
1383 	free(dev->iov_buf);
1384 	free(dev->name);
1385 	free(dev->bands);
1386 	free(dev->l2p);
1387 	free(dev);
1388 }
1389 
1390 int
1391 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
1392 {
1393 	struct spdk_ftl_dev *dev;
1394 	struct spdk_ftl_dev_init_opts opts = *_opts;
1395 	struct ftl_dev_init_ctx *init_ctx = NULL;
1396 	int rc = -ENOMEM;
1397 
1398 	dev = calloc(1, sizeof(*dev));
1399 	if (!dev) {
1400 		return -ENOMEM;
1401 	}
1402 
1403 	init_ctx = calloc(1, sizeof(*init_ctx));
1404 	if (!init_ctx) {
1405 		goto fail_sync;
1406 	}
1407 
1408 	init_ctx->dev = dev;
1409 	init_ctx->opts = *_opts;
1410 	init_ctx->cb_fn = cb_fn;
1411 	init_ctx->cb_arg = cb_arg;
1412 	init_ctx->thread = spdk_get_thread();
1413 
1414 	if (!opts.conf) {
1415 		opts.conf = &g_default_conf;
1416 	}
1417 
1418 	if (!opts.base_bdev) {
1419 		SPDK_ERRLOG("Lack of underlying device in configuration\n");
1420 		rc = -EINVAL;
1421 		goto fail_sync;
1422 	}
1423 
1424 	dev->conf = *opts.conf;
1425 	dev->limit = SPDK_FTL_LIMIT_MAX;
1426 
1427 	dev->name = strdup(opts.name);
1428 	if (!dev->name) {
1429 		SPDK_ERRLOG("Unable to set device name\n");
1430 		goto fail_sync;
1431 	}
1432 
1433 	if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
1434 		SPDK_ERRLOG("Unsupported underlying device\n");
1435 		goto fail_sync;
1436 	}
1437 
1438 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1439 	/* so we don't have to clean up in each of the init functions. */
1440 	if (ftl_check_conf(dev, opts.conf)) {
1441 		SPDK_ERRLOG("Invalid device configuration\n");
1442 		goto fail_sync;
1443 	}
1444 
1445 	if (ftl_init_lba_map_pools(dev)) {
1446 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1447 		goto fail_sync;
1448 	}
1449 
1450 	if (ftl_init_media_events_pool(dev)) {
1451 		SPDK_ERRLOG("Unable to init media events pools\n");
1452 		goto fail_sync;
1453 	}
1454 
1455 	ftl_init_wptr_list(dev);
1456 
1457 	if (ftl_dev_init_bands(dev)) {
1458 		SPDK_ERRLOG("Unable to initialize band array\n");
1459 		goto fail_sync;
1460 	}
1461 
1462 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
1463 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1464 		goto fail_sync;
1465 	}
1466 
1467 	dev->reloc = ftl_reloc_init(dev);
1468 	if (!dev->reloc) {
1469 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1470 		goto fail_sync;
1471 	}
1472 
1473 	if (ftl_dev_init_io_channel(dev)) {
1474 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1475 		goto fail_sync;
1476 	}
1477 
1478 	if (ftl_dev_init_zones(init_ctx)) {
1479 		SPDK_ERRLOG("Failed to initialize zones\n");
1480 		goto fail_async;
1481 	}
1482 
1483 	return 0;
1484 fail_sync:
1485 	ftl_dev_free_sync(dev);
1486 	ftl_dev_free_init_ctx(init_ctx);
1487 	return rc;
1488 fail_async:
1489 	ftl_init_fail(init_ctx);
1490 	return 0;
1491 }
1492 
1493 static void
1494 _ftl_halt_defrag(void *arg)
1495 {
1496 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1497 }
1498 
1499 static void
1500 ftl_halt_complete_cb(void *ctx)
1501 {
1502 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1503 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1504 
1505 	/* Make sure core IO channel has already been released */
1506 	if (dev->num_io_channels > 0) {
1507 		spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1508 		return;
1509 	}
1510 
1511 	ftl_dev_free_sync(fini_ctx->dev);
1512 	if (fini_ctx->cb_fn != NULL) {
1513 		fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
1514 	}
1515 
1516 	ftl_dev_free_init_ctx(fini_ctx);
1517 }
1518 
1519 static void
1520 ftl_put_io_channel_cb(void *ctx)
1521 {
1522 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1523 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1524 
1525 	spdk_put_io_channel(dev->ioch);
1526 	spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1527 }
1528 
1529 static void
1530 ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1531 {
1532 	struct ftl_dev_init_ctx *fini_ctx = cb_arg;
1533 	int rc = 0;
1534 
1535 	spdk_bdev_free_io(bdev_io);
1536 	if (spdk_unlikely(!success)) {
1537 		SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
1538 		rc = -EIO;
1539 	}
1540 
1541 	fini_ctx->halt_complete_status = rc;
1542 	spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1543 }
1544 
1545 static int
1546 ftl_halt_poller(void *ctx)
1547 {
1548 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1549 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1550 
1551 	if (!dev->core_poller) {
1552 		spdk_poller_unregister(&fini_ctx->poller);
1553 
1554 		if (ftl_dev_has_nv_cache(dev)) {
1555 			ftl_nv_cache_write_header(&dev->nv_cache, true,
1556 						  ftl_nv_cache_header_fini_cb, fini_ctx);
1557 		} else {
1558 			fini_ctx->halt_complete_status = 0;
1559 			spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1560 		}
1561 	}
1562 
1563 	return 0;
1564 }
1565 
1566 static void
1567 ftl_add_halt_poller(void *ctx)
1568 {
1569 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1570 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1571 
1572 	dev->halt = 1;
1573 
1574 	_ftl_halt_defrag(dev);
1575 
1576 	assert(!fini_ctx->poller);
1577 	fini_ctx->poller = spdk_poller_register(ftl_halt_poller, fini_ctx, 100);
1578 }
1579 
1580 static int
1581 _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
1582 		   struct spdk_thread *thread)
1583 {
1584 	struct ftl_dev_init_ctx *fini_ctx;
1585 
1586 	if (dev->halt_started) {
1587 		dev->halt_started = true;
1588 		return -EBUSY;
1589 	}
1590 
1591 	fini_ctx = calloc(1, sizeof(*fini_ctx));
1592 	if (!fini_ctx) {
1593 		return -ENOMEM;
1594 	}
1595 
1596 	fini_ctx->dev = dev;
1597 	fini_ctx->cb_fn = cb_fn;
1598 	fini_ctx->cb_arg = cb_arg;
1599 	fini_ctx->thread = thread;
1600 
1601 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
1602 	return 0;
1603 }
1604 
1605 int
1606 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
1607 {
1608 	return _spdk_ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
1609 }
1610 
1611 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1612