xref: /spdk/lib/ftl/ftl_init.c (revision 94a84ae98590bea46939eb1dcd7a9876bd393b54)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/thread.h"
37 #include "spdk/string.h"
38 #include "spdk/likely.h"
39 #include "spdk_internal/log.h"
40 #include "spdk/ftl.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/bdev_zone.h"
44 #include "spdk/bdev_module.h"
45 #include "spdk/config.h"
46 
47 #include "ftl_core.h"
48 #include "ftl_io.h"
49 #include "ftl_reloc.h"
50 #include "ftl_band.h"
51 #include "ftl_debug.h"
52 
53 #ifdef SPDK_CONFIG_PMDK
54 #include "libpmem.h"
55 #endif /* SPDK_CONFIG_PMDK */
56 
57 #define FTL_CORE_RING_SIZE	4096
58 #define FTL_INIT_TIMEOUT	30
59 #define FTL_NSID		1
60 #define FTL_ZONE_INFO_COUNT	64
61 
62 /* Dummy bdev module used to to claim bdevs. */
63 static struct spdk_bdev_module g_ftl_bdev_module = {
64 	.name	= "ftl_lib",
65 };
66 
67 typedef void (*spdk_ftl_init_fn)(struct spdk_ftl_dev *, void *, int);
68 
69 struct ftl_dev_init_ctx {
70 	/* Owner */
71 	struct spdk_ftl_dev		*dev;
72 	/* Initial arguments */
73 	struct spdk_ftl_dev_init_opts	opts;
74 	/* IO channel for zone info retrieving */
75 	struct spdk_io_channel		*ioch;
76 	/* Buffer for reading zone info  */
77 	struct spdk_bdev_zone_info	info[FTL_ZONE_INFO_COUNT];
78 	/* Currently read zone */
79 	size_t				zone_id;
80 	/* User's callback */
81 	spdk_ftl_init_fn		cb_fn;
82 	/* Callback's argument */
83 	void				*cb_arg;
84 	/* Thread to call the callback on */
85 	struct spdk_thread		*thread;
86 	/* Poller to check if the device has been destroyed/initialized */
87 	struct spdk_poller		*poller;
88 	/* Status to return for halt completion callback */
89 	int				halt_complete_status;
90 };
91 
92 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
93 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
94 static const struct spdk_ftl_conf	g_default_conf = {
95 	.limits = {
96 		/* 5 free bands  / 0 % host writes */
97 		[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
98 		/* 10 free bands / 5 % host writes */
99 		[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
100 		/* 20 free bands / 40 % host writes */
101 		[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
102 		/* 40 free bands / 100 % host writes - defrag starts running */
103 		[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
104 	},
105 	/* 10 percent valid blocks */
106 	.invalid_thld = 10,
107 	/* 20% spare blocks */
108 	.lba_rsvd = 20,
109 	/* 6M write buffer per each IO channel */
110 	.write_buffer_size = 6 * 1024 * 1024,
111 	/* 90% band fill threshold */
112 	.band_thld = 90,
113 	/* Max 32 IO depth per band relocate */
114 	.max_reloc_qdepth = 32,
115 	/* Max 3 active band relocates */
116 	.max_active_relocs = 3,
117 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
118 	.user_io_pool_size = 2048,
119 	/*
120 	 * If clear ftl will return error when restoring after a dirty shutdown
121 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
122 	 * will result in lost data after recovery.
123 	 */
124 	.allow_open_bands = false,
125 	.max_io_channels = 128,
126 	.nv_cache = {
127 		/* Maximum number of concurrent requests */
128 		.max_request_cnt = 2048,
129 		/* Maximum number of blocks per request */
130 		.max_request_size = 16,
131 	}
132 };
133 
134 static int
135 ftl_band_init_md(struct ftl_band *band)
136 {
137 	struct ftl_lba_map *lba_map = &band->lba_map;
138 
139 	lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
140 	if (!lba_map->vld) {
141 		return -ENOMEM;
142 	}
143 
144 	pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
145 	ftl_band_md_clear(band);
146 	return 0;
147 }
148 
149 static int
150 ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
151 {
152 	size_t i;
153 
154 	if (conf->invalid_thld >= 100) {
155 		return -1;
156 	}
157 	if (conf->lba_rsvd >= 100) {
158 		return -1;
159 	}
160 	if (conf->lba_rsvd == 0) {
161 		return -1;
162 	}
163 	if (conf->write_buffer_size == 0) {
164 		return -1;
165 	}
166 	if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
167 		return -1;
168 	}
169 
170 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
171 		if (conf->limits[i].limit > 100) {
172 			return -1;
173 		}
174 	}
175 
176 	return 0;
177 }
178 
179 static int
180 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
181 {
182 	struct ftl_band *band, *pband;
183 	unsigned int i;
184 	int rc = 0;
185 
186 	LIST_INIT(&dev->free_bands);
187 	LIST_INIT(&dev->shut_bands);
188 
189 	dev->num_free = 0;
190 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
191 	if (!dev->bands) {
192 		return -1;
193 	}
194 
195 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
196 		band = &dev->bands[i];
197 		band->id = i;
198 		band->dev = dev;
199 		band->state = FTL_BAND_STATE_CLOSED;
200 
201 		if (LIST_EMPTY(&dev->shut_bands)) {
202 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
203 		} else {
204 			LIST_INSERT_AFTER(pband, band, list_entry);
205 		}
206 		pband = band;
207 
208 		CIRCLEQ_INIT(&band->zones);
209 		band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
210 		if (!band->zone_buf) {
211 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
212 			rc = -1;
213 			break;
214 		}
215 
216 		rc = ftl_band_init_md(band);
217 		if (rc) {
218 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
219 			break;
220 		}
221 
222 		band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
223 		if (!band->reloc_bitmap) {
224 			SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
225 			break;
226 		}
227 	}
228 
229 	return rc;
230 }
231 
232 static void
233 ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
234 {
235 	struct spdk_ftl_dev *dev = event_ctx;
236 
237 	switch (type) {
238 	case SPDK_BDEV_EVENT_REMOVE:
239 		assert(0);
240 		break;
241 	case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
242 		assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
243 		ftl_get_media_events(dev);
244 	default:
245 		break;
246 	}
247 }
248 
249 static int
250 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
251 {
252 	struct spdk_bdev *bdev;
253 	struct spdk_ftl_conf *conf = &dev->conf;
254 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
255 	char pool_name[128];
256 	int rc;
257 
258 	if (!bdev_name) {
259 		return 0;
260 	}
261 
262 	bdev = spdk_bdev_get_by_name(bdev_name);
263 	if (!bdev) {
264 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
265 		return -1;
266 	}
267 
268 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
269 			       dev, &nv_cache->bdev_desc)) {
270 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
271 		return -1;
272 	}
273 
274 	if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
275 		spdk_bdev_close(nv_cache->bdev_desc);
276 		nv_cache->bdev_desc = NULL;
277 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
278 		return -1;
279 	}
280 
281 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
282 		     spdk_bdev_get_name(bdev));
283 
284 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
285 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
286 		return -1;
287 	}
288 
289 	if (!spdk_bdev_is_md_separate(bdev)) {
290 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
291 			    spdk_bdev_get_name(bdev));
292 		return -1;
293 	}
294 
295 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
296 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
297 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
298 		return -1;
299 	}
300 
301 	if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
302 		SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
303 			    spdk_bdev_get_name(bdev));
304 		return -1;
305 	}
306 
307 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
308 	 * from the fact that cache works as a protection against power loss, so before the data
309 	 * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
310 	 * extra block is needed to store the header.
311 	 */
312 	if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
313 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
314 			    PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
315 			    ftl_get_num_blocks_in_band(dev) * 2 + 1);
316 		return -1;
317 	}
318 
319 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
320 	if (rc < 0 || rc >= 128) {
321 		return -1;
322 	}
323 
324 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
325 						spdk_bdev_get_md_size(bdev) *
326 						conf->nv_cache.max_request_size,
327 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
328 						SPDK_ENV_SOCKET_ID_ANY);
329 	if (!nv_cache->md_pool) {
330 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
331 		return -1;
332 	}
333 
334 	nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
335 	if (!nv_cache->dma_buf) {
336 		SPDK_ERRLOG("Memory allocation failure\n");
337 		return -1;
338 	}
339 
340 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
341 		SPDK_ERRLOG("Failed to initialize cache lock\n");
342 		return -1;
343 	}
344 
345 	nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
346 	nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
347 	nv_cache->num_available = nv_cache->num_data_blocks;
348 	nv_cache->ready = false;
349 
350 	return 0;
351 }
352 
353 void
354 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
355 {
356 	*conf = g_default_conf;
357 }
358 
359 static void
360 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
361 {
362 	struct ftl_lba_map_request *request = obj;
363 	struct spdk_ftl_dev *dev = opaque;
364 
365 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
366 				    ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
367 }
368 
369 static int
370 ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
371 {
372 	char pool_name[128];
373 	int rc;
374 
375 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
376 	if (rc < 0 || rc >= (int)sizeof(pool_name)) {
377 		SPDK_ERRLOG("Failed to create media pool name\n");
378 		return -1;
379 	}
380 
381 	dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
382 				 sizeof(struct ftl_media_event),
383 				 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
384 				 SPDK_ENV_SOCKET_ID_ANY);
385 	if (!dev->media_events_pool) {
386 		SPDK_ERRLOG("Failed to create media events pool\n");
387 		return -1;
388 	}
389 
390 	return 0;
391 }
392 
393 static int
394 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
395 {
396 #define POOL_NAME_LEN 128
397 	char pool_name[POOL_NAME_LEN];
398 	int rc;
399 
400 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
401 	if (rc < 0 || rc >= POOL_NAME_LEN) {
402 		return -ENAMETOOLONG;
403 	}
404 
405 	/* We need to reserve at least 2 buffers for band close / open sequence
406 	 * alone, plus additional (8) buffers for handling write errors.
407 	 * TODO: This memory pool is utilized only by core thread - it introduce
408 	 * unnecessary overhead and should be replaced by different data structure.
409 	 */
410 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
411 					    ftl_lba_map_pool_elem_size(dev),
412 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
413 					    SPDK_ENV_SOCKET_ID_ANY);
414 	if (!dev->lba_pool) {
415 		return -ENOMEM;
416 	}
417 
418 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
419 	if (rc < 0 || rc >= POOL_NAME_LEN) {
420 		return -ENAMETOOLONG;
421 	}
422 
423 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
424 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
425 				sizeof(struct ftl_lba_map_request),
426 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
427 				SPDK_ENV_SOCKET_ID_ANY,
428 				ftl_lba_map_request_ctor,
429 				dev);
430 	if (!dev->lba_request_pool) {
431 		return -ENOMEM;
432 	}
433 
434 	return 0;
435 }
436 
437 static void
438 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
439 {
440 	LIST_INIT(&dev->wptr_list);
441 	LIST_INIT(&dev->flush_list);
442 	LIST_INIT(&dev->band_flush_list);
443 }
444 
445 static size_t
446 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
447 {
448 	struct ftl_band *band;
449 	size_t seq = 0;
450 
451 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
452 		if (band->seq > seq) {
453 			seq = band->seq;
454 		}
455 	}
456 
457 	return seq;
458 }
459 
460 static void
461 _ftl_init_bands_state(void *ctx)
462 {
463 	struct ftl_band *band, *temp_band;
464 	struct spdk_ftl_dev *dev = ctx;
465 
466 	dev->seq = ftl_dev_band_max_seq(dev);
467 
468 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
469 		if (!band->lba_map.num_vld) {
470 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
471 		}
472 	}
473 
474 	ftl_reloc_resume(dev->reloc);
475 	/* Clear the limit applications as they're incremented incorrectly by */
476 	/* the initialization code */
477 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
478 }
479 
480 static int
481 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
482 {
483 	struct ftl_band *band;
484 	int cnt = 0;
485 
486 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
487 		if (band->num_zones && !band->lba_map.num_vld) {
488 			cnt++;
489 		}
490 	}
491 	return cnt;
492 }
493 
494 static int
495 ftl_init_bands_state(struct spdk_ftl_dev *dev)
496 {
497 	/* TODO: Should we abort initialization or expose read only device */
498 	/* if there is no free bands? */
499 	/* If we abort initialization should we depend on condition that */
500 	/* we have no free bands or should we have some minimal number of */
501 	/* free bands? */
502 	if (!ftl_init_num_free_bands(dev)) {
503 		return -1;
504 	}
505 
506 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
507 	return 0;
508 }
509 
510 static void
511 _ftl_dev_init_core_thread(void *ctx)
512 {
513 	struct spdk_ftl_dev *dev = ctx;
514 
515 	dev->core_poller = spdk_poller_register(ftl_task_core, dev, 0);
516 	if (!dev->core_poller) {
517 		SPDK_ERRLOG("Unable to register core poller\n");
518 		assert(0);
519 	}
520 
521 	dev->ioch = spdk_get_io_channel(dev);
522 }
523 
524 static int
525 ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
526 {
527 	if (!opts->core_thread) {
528 		return -1;
529 	}
530 
531 	dev->core_thread = opts->core_thread;
532 
533 	spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
534 	return 0;
535 }
536 
537 static int
538 ftl_dev_l2p_alloc_pmem(struct spdk_ftl_dev *dev, size_t l2p_size, const char *l2p_path)
539 {
540 #ifdef SPDK_CONFIG_PMDK
541 	int is_pmem;
542 
543 	if ((dev->l2p = pmem_map_file(l2p_path, 0,
544 				      0, 0, &dev->l2p_pmem_len, &is_pmem)) == NULL) {
545 		SPDK_ERRLOG("Failed to mmap l2p_path\n");
546 		return -1;
547 	}
548 
549 	if (!is_pmem) {
550 		SPDK_NOTICELOG("l2p_path mapped on non-pmem device\n");
551 	}
552 
553 	if (dev->l2p_pmem_len < l2p_size) {
554 		SPDK_ERRLOG("l2p_path file is too small\n");
555 		return -1;
556 	}
557 
558 	pmem_memset_persist(dev->l2p, FTL_ADDR_INVALID, l2p_size);
559 
560 	return 0;
561 #else /* SPDK_CONFIG_PMDK */
562 	SPDK_ERRLOG("Libpmem not available, cannot use pmem l2p_path\n");
563 	return -1;
564 #endif /* SPDK_CONFIG_PMDK */
565 }
566 
567 static int
568 ftl_dev_l2p_alloc_dram(struct spdk_ftl_dev *dev, size_t l2p_size)
569 {
570 	dev->l2p = malloc(l2p_size);
571 	if (!dev->l2p) {
572 		SPDK_ERRLOG("Failed to allocate l2p table\n");
573 		return -1;
574 	}
575 
576 	memset(dev->l2p, FTL_ADDR_INVALID, l2p_size);
577 
578 	return 0;
579 }
580 
581 static int
582 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
583 {
584 	size_t addr_size = dev->addr_len >= 32 ? 8 : 4;
585 	size_t l2p_size = dev->num_lbas * addr_size;
586 	const char *l2p_path = dev->conf.l2p_path;
587 
588 	if (dev->num_lbas == 0) {
589 		SPDK_ERRLOG("Invalid l2p table size\n");
590 		return -1;
591 	}
592 
593 	if (dev->l2p) {
594 		SPDK_ERRLOG("L2p table already allocated\n");
595 		return -1;
596 	}
597 
598 	dev->l2p_pmem_len = 0;
599 	if (l2p_path) {
600 		return ftl_dev_l2p_alloc_pmem(dev, l2p_size, l2p_path);
601 	} else {
602 		return ftl_dev_l2p_alloc_dram(dev, l2p_size);
603 	}
604 }
605 
606 static void
607 ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
608 {
609 	if (!init_ctx) {
610 		return;
611 	}
612 
613 	if (init_ctx->ioch) {
614 		spdk_put_io_channel(init_ctx->ioch);
615 	}
616 
617 	free(init_ctx);
618 }
619 
620 static void
621 ftl_call_init_complete_cb(void *ctx)
622 {
623 	struct ftl_dev_init_ctx *init_ctx = ctx;
624 	struct spdk_ftl_dev *dev = init_ctx->dev;
625 
626 	if (init_ctx->cb_fn != NULL) {
627 		init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
628 	}
629 
630 	ftl_dev_free_init_ctx(init_ctx);
631 }
632 
633 static void
634 ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
635 {
636 	struct spdk_ftl_dev *dev = init_ctx->dev;
637 
638 	pthread_mutex_lock(&g_ftl_queue_lock);
639 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
640 	pthread_mutex_unlock(&g_ftl_queue_lock);
641 
642 	dev->initialized = 1;
643 
644 	spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
645 }
646 
647 static void
648 ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
649 {
650 	struct ftl_dev_init_ctx *init_ctx = ctx;
651 
652 	if (init_ctx->cb_fn != NULL) {
653 		init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
654 	}
655 
656 	ftl_dev_free_init_ctx(init_ctx);
657 }
658 
659 static int _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
660 			      struct spdk_thread *thread);
661 
662 static void
663 ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
664 {
665 	if (_spdk_ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
666 		SPDK_ERRLOG("Unable to free the device\n");
667 		assert(0);
668 	}
669 }
670 
671 static void
672 ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
673 {
674 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
675 	struct spdk_ftl_dev *dev = init_ctx->dev;
676 
677 	spdk_bdev_free_io(bdev_io);
678 	if (spdk_unlikely(!success)) {
679 		SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
680 		ftl_init_fail(init_ctx);
681 		return;
682 	}
683 
684 	dev->nv_cache.ready = true;
685 	ftl_init_complete(init_ctx);
686 }
687 
688 static void
689 ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
690 {
691 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
692 	struct spdk_ftl_dev *dev = init_ctx->dev;
693 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
694 
695 	spdk_bdev_free_io(bdev_io);
696 	if (spdk_unlikely(!success)) {
697 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
698 		ftl_init_fail(init_ctx);
699 		return;
700 	}
701 
702 	nv_cache->phase = 1;
703 	if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
704 		SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
705 		ftl_init_fail(init_ctx);
706 	}
707 }
708 
709 static void
710 _ftl_nv_cache_scrub(void *ctx)
711 {
712 	struct ftl_dev_init_ctx *init_ctx = ctx;
713 	struct spdk_ftl_dev *dev = init_ctx->dev;
714 	int rc;
715 
716 	rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
717 
718 	if (spdk_unlikely(rc != 0)) {
719 		SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
720 			    spdk_strerror(-rc));
721 		ftl_init_fail(init_ctx);
722 	}
723 }
724 
725 static int
726 ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
727 {
728 	struct spdk_ftl_dev *dev = init_ctx->dev;
729 	struct spdk_ftl_conf *conf = &dev->conf;
730 	size_t i;
731 
732 	spdk_uuid_generate(&dev->uuid);
733 
734 	dev->num_lbas = 0;
735 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
736 		dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
737 	}
738 
739 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
740 
741 	if (ftl_dev_l2p_alloc(dev)) {
742 		SPDK_ERRLOG("Unable to init l2p table\n");
743 		return -1;
744 	}
745 
746 	if (ftl_init_bands_state(dev)) {
747 		SPDK_ERRLOG("Unable to finish the initialization\n");
748 		return -1;
749 	}
750 
751 	if (!ftl_dev_has_nv_cache(dev)) {
752 		ftl_init_complete(init_ctx);
753 	} else {
754 		spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
755 	}
756 
757 	return 0;
758 }
759 
760 static void
761 ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
762 {
763 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
764 
765 	if (spdk_unlikely(status != 0)) {
766 		SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
767 		ftl_init_fail(init_ctx);
768 		return;
769 	}
770 
771 	ftl_init_complete(init_ctx);
772 }
773 
774 static void
775 ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
776 {
777 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
778 	struct spdk_ftl_dev *dev = init_ctx->dev;
779 
780 	if (status) {
781 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
782 		ftl_init_fail(init_ctx);
783 		return;
784 	}
785 
786 	if (ftl_init_bands_state(dev)) {
787 		SPDK_ERRLOG("Unable to finish the initialization\n");
788 		ftl_init_fail(init_ctx);
789 		return;
790 	}
791 
792 	if (!ftl_dev_has_nv_cache(dev)) {
793 		ftl_init_complete(init_ctx);
794 		return;
795 	}
796 
797 	ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
798 }
799 
800 static void
801 ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
802 {
803 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
804 
805 	if (status) {
806 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
807 		goto error;
808 	}
809 
810 	/* After the metadata is read it should be possible to allocate the L2P */
811 	if (ftl_dev_l2p_alloc(init_ctx->dev)) {
812 		SPDK_ERRLOG("Failed to allocate the L2P\n");
813 		goto error;
814 	}
815 
816 	if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
817 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
818 		goto error;
819 	}
820 
821 	return;
822 error:
823 	ftl_init_fail(init_ctx);
824 }
825 
826 static int
827 ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
828 {
829 	struct spdk_ftl_dev *dev = init_ctx->dev;
830 
831 	dev->uuid = init_ctx->opts.uuid;
832 
833 	if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
834 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
835 		return -1;
836 	}
837 
838 	return 0;
839 }
840 
841 static void
842 ftl_dev_update_bands(struct spdk_ftl_dev *dev)
843 {
844 	struct ftl_band *band, *temp_band;
845 	size_t i;
846 
847 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
848 		band = &dev->bands[i];
849 		band->tail_md_addr = ftl_band_tail_md_addr(band);
850 	}
851 
852 	/* Remove band from shut_bands list to prevent further processing */
853 	/* if all blocks on this band are bad */
854 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
855 		if (!band->num_zones) {
856 			dev->num_bands--;
857 			LIST_REMOVE(band, list_entry);
858 		}
859 	}
860 }
861 
862 static void
863 ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
864 {
865 	struct spdk_ftl_dev *dev = init_ctx->dev;
866 
867 	ftl_dev_update_bands(dev);
868 
869 	if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
870 		SPDK_ERRLOG("Unable to initialize device thread\n");
871 		ftl_init_fail(init_ctx);
872 		return;
873 	}
874 
875 	if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
876 		if (ftl_setup_initial_state(init_ctx)) {
877 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
878 			ftl_init_fail(init_ctx);
879 			return;
880 		}
881 	} else {
882 		if (ftl_restore_state(init_ctx)) {
883 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
884 			ftl_init_fail(init_ctx);
885 			return;
886 		}
887 	}
888 }
889 
890 static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
891 
892 static void
893 ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
894 {
895 	struct ftl_dev_init_ctx *init_ctx = cb_arg;
896 	struct spdk_ftl_dev *dev = init_ctx->dev;
897 	struct ftl_band *band;
898 	struct ftl_zone *zone;
899 	struct ftl_addr addr;
900 	size_t i, zones_left, num_zones;
901 
902 	spdk_bdev_free_io(bdev_io);
903 
904 	if (spdk_unlikely(!success)) {
905 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
906 		ftl_init_fail(init_ctx);
907 		return;
908 	}
909 
910 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
911 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
912 
913 	for (i = 0; i < num_zones; ++i) {
914 		addr.offset = init_ctx->info[i].zone_id;
915 		band = &dev->bands[ftl_addr_get_band(dev, addr)];
916 		zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
917 		zone->info = init_ctx->info[i];
918 
919 		/* TODO: add support for zone capacity less than zone size */
920 		if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
921 			zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
922 			SPDK_ERRLOG("Zone capacity is not equal zone size for "
923 				    "zone id: %"PRIu64"\n", init_ctx->zone_id);
924 		}
925 
926 		if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
927 			band->num_zones++;
928 			CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
929 		}
930 	}
931 
932 	init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
933 
934 	ftl_dev_get_zone_info(init_ctx);
935 }
936 
937 static void
938 ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
939 {
940 	struct spdk_ftl_dev *dev = init_ctx->dev;
941 	size_t zones_left, num_zones;
942 	int rc;
943 
944 	zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
945 	if (zones_left == 0) {
946 		ftl_dev_init_state(init_ctx);
947 		return;
948 	}
949 
950 	num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
951 
952 	rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
953 				     init_ctx->zone_id, num_zones, init_ctx->info,
954 				     ftl_dev_get_zone_info_cb, init_ctx);
955 
956 	if (spdk_unlikely(rc != 0)) {
957 		SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
958 		ftl_init_fail(init_ctx);
959 	}
960 }
961 
962 static int
963 ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
964 {
965 	struct spdk_ftl_dev *dev =  init_ctx->dev;
966 
967 	init_ctx->zone_id = 0;
968 	init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
969 	if (!init_ctx->ioch) {
970 		SPDK_ERRLOG("Failed to get base bdev IO channel\n");
971 		return -1;
972 	}
973 
974 	ftl_dev_get_zone_info(init_ctx);
975 
976 	return 0;
977 }
978 
979 struct _ftl_io_channel {
980 	struct ftl_io_channel *ioch;
981 };
982 
983 struct ftl_io_channel *
984 ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
985 {
986 	struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
987 
988 	return _ioch->ioch;
989 }
990 
991 static void
992 ftl_io_channel_register(void *ctx)
993 {
994 	struct ftl_io_channel *ioch = ctx;
995 	struct spdk_ftl_dev *dev = ioch->dev;
996 	uint32_t ioch_index;
997 
998 	for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
999 		if (dev->ioch_array[ioch_index] == NULL) {
1000 			dev->ioch_array[ioch_index] = ioch;
1001 			ioch->index = ioch_index;
1002 			break;
1003 		}
1004 	}
1005 
1006 	assert(ioch_index < dev->conf.max_io_channels);
1007 	TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
1008 }
1009 
1010 static int
1011 ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
1012 {
1013 	struct spdk_ftl_dev *dev = ioch->dev;
1014 	struct ftl_wbuf_entry *entry;
1015 	uint32_t i;
1016 	int rc;
1017 
1018 	ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
1019 	ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
1020 	if (ioch->wbuf_entries == NULL) {
1021 		SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
1022 		return -1;
1023 	}
1024 
1025 	ioch->qdepth_limit = ioch->num_entries;
1026 	ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
1027 					  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1028 	if (ioch->wbuf_payload == NULL) {
1029 		SPDK_ERRLOG("Failed to allocate write buffer payload\n");
1030 		goto error_entries;
1031 	}
1032 
1033 	ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1034 					    spdk_align32pow2(ioch->num_entries + 1),
1035 					    SPDK_ENV_SOCKET_ID_ANY);
1036 	if (ioch->free_queue == NULL) {
1037 		SPDK_ERRLOG("Failed to allocate free queue\n");
1038 		goto error_payload;
1039 	}
1040 
1041 	ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
1042 					      spdk_align32pow2(ioch->num_entries + 1),
1043 					      SPDK_ENV_SOCKET_ID_ANY);
1044 	if (ioch->submit_queue == NULL) {
1045 		SPDK_ERRLOG("Failed to allocate submit queue\n");
1046 		goto error_free_queue;
1047 	}
1048 
1049 	for (i = 0; i < ioch->num_entries; ++i) {
1050 		entry = &ioch->wbuf_entries[i];
1051 		entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
1052 		entry->ioch = ioch;
1053 		entry->index = i;
1054 		entry->addr.offset = FTL_ADDR_INVALID;
1055 
1056 		rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
1057 		if (rc != 0) {
1058 			SPDK_ERRLOG("Failed to initialize spinlock\n");
1059 			goto error_spinlock;
1060 		}
1061 
1062 		spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
1063 	}
1064 
1065 	return 0;
1066 error_spinlock:
1067 	for (; i > 0; --i) {
1068 		pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
1069 	}
1070 
1071 	spdk_ring_free(ioch->submit_queue);
1072 error_free_queue:
1073 	spdk_ring_free(ioch->free_queue);
1074 error_payload:
1075 	spdk_free(ioch->wbuf_payload);
1076 error_entries:
1077 	free(ioch->wbuf_entries);
1078 
1079 	return -1;
1080 }
1081 
1082 static int
1083 ftl_io_channel_create_cb(void *io_device, void *ctx)
1084 {
1085 	struct spdk_ftl_dev *dev = io_device;
1086 	struct _ftl_io_channel *_ioch = ctx;
1087 	struct ftl_io_channel *ioch;
1088 	uint32_t num_io_channels;
1089 	char mempool_name[32];
1090 	int rc;
1091 
1092 	num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1093 	if (num_io_channels >= dev->conf.max_io_channels) {
1094 		SPDK_ERRLOG("Reached maximum number of IO channels\n");
1095 		__atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1096 		return -1;
1097 	}
1098 
1099 	ioch = calloc(1, sizeof(*ioch));
1100 	if (ioch == NULL) {
1101 		SPDK_ERRLOG("Failed to allocate IO channel\n");
1102 		return -1;
1103 	}
1104 
1105 	rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
1106 	if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
1107 		SPDK_ERRLOG("Failed to create IO channel pool name\n");
1108 		free(ioch);
1109 		return -1;
1110 	}
1111 
1112 	ioch->cache_ioch = NULL;
1113 	ioch->index = FTL_IO_CHANNEL_INDEX_INVALID;
1114 	ioch->dev = dev;
1115 	ioch->elem_size = sizeof(struct ftl_md_io);
1116 	ioch->io_pool = spdk_mempool_create(mempool_name,
1117 					    dev->conf.user_io_pool_size,
1118 					    ioch->elem_size,
1119 					    0,
1120 					    SPDK_ENV_SOCKET_ID_ANY);
1121 	if (!ioch->io_pool) {
1122 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
1123 		free(ioch);
1124 		return -1;
1125 	}
1126 
1127 	ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
1128 	if (!ioch->base_ioch) {
1129 		SPDK_ERRLOG("Failed to create base bdev IO channel\n");
1130 		goto fail_ioch;
1131 	}
1132 
1133 	if (ftl_dev_has_nv_cache(dev)) {
1134 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
1135 		if (!ioch->cache_ioch) {
1136 			SPDK_ERRLOG("Failed to create cache IO channel\n");
1137 			goto fail_cache;
1138 		}
1139 	}
1140 
1141 	TAILQ_INIT(&ioch->write_cmpl_queue);
1142 	TAILQ_INIT(&ioch->retry_queue);
1143 	ioch->poller = spdk_poller_register(ftl_io_channel_poll, ioch, 0);
1144 	if (!ioch->poller) {
1145 		SPDK_ERRLOG("Failed to register IO channel poller\n");
1146 		goto fail_poller;
1147 	}
1148 
1149 	if (ftl_io_channel_init_wbuf(ioch)) {
1150 		SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
1151 		goto fail_wbuf;
1152 	}
1153 
1154 	_ioch->ioch = ioch;
1155 
1156 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
1157 
1158 	return 0;
1159 fail_wbuf:
1160 	spdk_poller_unregister(&ioch->poller);
1161 fail_poller:
1162 	if (ioch->cache_ioch) {
1163 		spdk_put_io_channel(ioch->cache_ioch);
1164 	}
1165 fail_cache:
1166 	spdk_put_io_channel(ioch->base_ioch);
1167 fail_ioch:
1168 	spdk_mempool_free(ioch->io_pool);
1169 	free(ioch);
1170 
1171 	return -1;
1172 }
1173 
1174 static void
1175 ftl_io_channel_unregister(void *ctx)
1176 {
1177 	struct ftl_io_channel *ioch = ctx;
1178 	struct spdk_ftl_dev *dev = ioch->dev;
1179 	uint32_t i, num_io_channels __attribute__((unused));
1180 
1181 	assert(ioch->index < dev->conf.max_io_channels);
1182 	assert(dev->ioch_array[ioch->index] == ioch);
1183 
1184 	dev->ioch_array[ioch->index] = NULL;
1185 	TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
1186 
1187 	num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
1188 	assert(num_io_channels > 0);
1189 
1190 	for (i = 0; i < ioch->num_entries; ++i) {
1191 		pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
1192 	}
1193 
1194 	spdk_mempool_free(ioch->io_pool);
1195 	spdk_ring_free(ioch->free_queue);
1196 	spdk_ring_free(ioch->submit_queue);
1197 	spdk_free(ioch->wbuf_payload);
1198 	free(ioch->wbuf_entries);
1199 	free(ioch);
1200 }
1201 
1202 static void
1203 _ftl_io_channel_destroy_cb(void *ctx)
1204 {
1205 	struct ftl_io_channel *ioch = ctx;
1206 	struct spdk_ftl_dev *dev = ioch->dev;
1207 
1208 	/* Do not destroy the channel if some of its entries are still in use */
1209 	if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
1210 		spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
1211 		return;
1212 	}
1213 
1214 	spdk_poller_unregister(&ioch->poller);
1215 
1216 	spdk_put_io_channel(ioch->base_ioch);
1217 	if (ioch->cache_ioch) {
1218 		spdk_put_io_channel(ioch->cache_ioch);
1219 	}
1220 
1221 	ioch->base_ioch = NULL;
1222 	ioch->cache_ioch = NULL;
1223 
1224 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
1225 }
1226 
1227 static void
1228 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
1229 {
1230 	struct _ftl_io_channel *_ioch = ctx;
1231 	struct ftl_io_channel *ioch = _ioch->ioch;
1232 
1233 	/* Mark the IO channel as being flush to force out any unwritten entries */
1234 	ioch->flush = true;
1235 
1236 	_ftl_io_channel_destroy_cb(ioch);
1237 }
1238 
1239 static int
1240 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
1241 {
1242 	struct ftl_batch *batch;
1243 	uint32_t i;
1244 
1245 	/* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
1246 	dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
1247 	dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
1248 
1249 	dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
1250 	if (!dev->ioch_array) {
1251 		SPDK_ERRLOG("Failed to allocate IO channel array\n");
1252 		return -1;
1253 	}
1254 
1255 	if (dev->md_size > 0) {
1256 		dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
1257 					   dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
1258 					   SPDK_MALLOC_DMA);
1259 		if (dev->md_buf == NULL) {
1260 			SPDK_ERRLOG("Failed to allocate metadata buffer\n");
1261 			return -1;
1262 		}
1263 	}
1264 
1265 	dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
1266 	if (!dev->iov_buf) {
1267 		SPDK_ERRLOG("Failed to allocate iovec buffer\n");
1268 		return -1;
1269 	}
1270 
1271 	TAILQ_INIT(&dev->free_batches);
1272 	TAILQ_INIT(&dev->pending_batches);
1273 	TAILQ_INIT(&dev->ioch_queue);
1274 
1275 	for (i = 0; i < FTL_BATCH_COUNT; ++i) {
1276 		batch = &dev->batch_array[i];
1277 		batch->iov = &dev->iov_buf[i * dev->xfer_size];
1278 		batch->num_entries = 0;
1279 		batch->index = i;
1280 		TAILQ_INIT(&batch->entries);
1281 		if (dev->md_buf != NULL) {
1282 			batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
1283 		}
1284 
1285 		TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
1286 	}
1287 
1288 	dev->num_io_channels = 0;
1289 
1290 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
1291 				sizeof(struct _ftl_io_channel),
1292 				NULL);
1293 
1294 	return 0;
1295 }
1296 
1297 static int
1298 ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
1299 {
1300 	uint32_t block_size;
1301 	uint64_t num_blocks;
1302 	struct spdk_bdev *bdev;
1303 
1304 	bdev = spdk_bdev_get_by_name(bdev_name);
1305 	if (!bdev) {
1306 		SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
1307 		return -1;
1308 	}
1309 
1310 	if (!spdk_bdev_is_zoned(bdev)) {
1311 		SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
1312 			    spdk_bdev_get_name(bdev));
1313 		return -1;
1314 	}
1315 
1316 	if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
1317 			       dev, &dev->base_bdev_desc)) {
1318 		SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
1319 		return -1;
1320 	}
1321 
1322 	if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
1323 		spdk_bdev_close(dev->base_bdev_desc);
1324 		dev->base_bdev_desc = NULL;
1325 		SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
1326 		return -1;
1327 	}
1328 
1329 	dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
1330 	dev->md_size = spdk_bdev_get_md_size(bdev);
1331 
1332 	block_size = spdk_bdev_get_block_size(bdev);
1333 	if (block_size != FTL_BLOCK_SIZE) {
1334 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
1335 		return -1;
1336 	}
1337 
1338 	num_blocks = spdk_bdev_get_num_blocks(bdev);
1339 	if (num_blocks % ftl_get_num_punits(dev)) {
1340 		SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
1341 			    "of optimal number of zones.\n");
1342 		return -1;
1343 	}
1344 
1345 	if (ftl_is_append_supported(dev) &&
1346 	    !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1347 		SPDK_ERRLOG("Bdev dosen't support append: %s\n",
1348 			    spdk_bdev_get_name(bdev));
1349 		return -1;
1350 	}
1351 
1352 	dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
1353 	dev->addr_len = spdk_u64log2(num_blocks) + 1;
1354 
1355 	return 0;
1356 }
1357 
1358 static void
1359 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1360 {
1361 	struct ftl_lba_map_request *request = obj;
1362 
1363 	spdk_bit_array_free(&request->segments);
1364 }
1365 
1366 static void
1367 ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
1368 {
1369 	if (!bdev_desc) {
1370 		return;
1371 	}
1372 
1373 	spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
1374 	spdk_bdev_close(bdev_desc);
1375 }
1376 
1377 static void
1378 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1379 {
1380 	struct spdk_ftl_dev *iter;
1381 	size_t i;
1382 
1383 	if (!dev) {
1384 		return;
1385 	}
1386 
1387 	pthread_mutex_lock(&g_ftl_queue_lock);
1388 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1389 		if (iter == dev) {
1390 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1391 			break;
1392 		}
1393 	}
1394 	pthread_mutex_unlock(&g_ftl_queue_lock);
1395 
1396 	assert(LIST_EMPTY(&dev->wptr_list));
1397 	assert(dev->current_batch == NULL);
1398 
1399 	ftl_dev_dump_bands(dev);
1400 	ftl_dev_dump_stats(dev);
1401 
1402 	if (dev->bands) {
1403 		for (i = 0; i < ftl_get_num_bands(dev); ++i) {
1404 			free(dev->bands[i].zone_buf);
1405 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1406 			spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
1407 		}
1408 	}
1409 
1410 	spdk_dma_free(dev->nv_cache.dma_buf);
1411 
1412 	spdk_mempool_free(dev->lba_pool);
1413 	spdk_mempool_free(dev->nv_cache.md_pool);
1414 	spdk_mempool_free(dev->media_events_pool);
1415 	if (dev->lba_request_pool) {
1416 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1417 	}
1418 	spdk_mempool_free(dev->lba_request_pool);
1419 
1420 	ftl_reloc_free(dev->reloc);
1421 
1422 	ftl_release_bdev(dev->nv_cache.bdev_desc);
1423 	ftl_release_bdev(dev->base_bdev_desc);
1424 
1425 	spdk_free(dev->md_buf);
1426 
1427 	assert(dev->num_io_channels == 0);
1428 	free(dev->ioch_array);
1429 	free(dev->iov_buf);
1430 	free(dev->name);
1431 	free(dev->bands);
1432 	if (dev->l2p_pmem_len != 0) {
1433 #ifdef SPDK_CONFIG_PMDK
1434 		pmem_unmap(dev->l2p, dev->l2p_pmem_len);
1435 #endif /* SPDK_CONFIG_PMDK */
1436 	} else {
1437 		free(dev->l2p);
1438 	}
1439 	free((char *)dev->conf.l2p_path);
1440 	free(dev);
1441 }
1442 
1443 int
1444 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
1445 {
1446 	struct spdk_ftl_dev *dev;
1447 	struct spdk_ftl_dev_init_opts opts = *_opts;
1448 	struct ftl_dev_init_ctx *init_ctx = NULL;
1449 	int rc = -ENOMEM;
1450 
1451 	dev = calloc(1, sizeof(*dev));
1452 	if (!dev) {
1453 		return -ENOMEM;
1454 	}
1455 
1456 	init_ctx = calloc(1, sizeof(*init_ctx));
1457 	if (!init_ctx) {
1458 		goto fail_sync;
1459 	}
1460 
1461 	init_ctx->dev = dev;
1462 	init_ctx->opts = *_opts;
1463 	init_ctx->cb_fn = cb_fn;
1464 	init_ctx->cb_arg = cb_arg;
1465 	init_ctx->thread = spdk_get_thread();
1466 
1467 	if (!opts.conf) {
1468 		opts.conf = &g_default_conf;
1469 	}
1470 
1471 	if (!opts.base_bdev) {
1472 		SPDK_ERRLOG("Lack of underlying device in configuration\n");
1473 		rc = -EINVAL;
1474 		goto fail_sync;
1475 	}
1476 
1477 	dev->conf = *opts.conf;
1478 	dev->limit = SPDK_FTL_LIMIT_MAX;
1479 
1480 	dev->name = strdup(opts.name);
1481 	if (!dev->name) {
1482 		SPDK_ERRLOG("Unable to set device name\n");
1483 		goto fail_sync;
1484 	}
1485 
1486 	if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
1487 		SPDK_ERRLOG("Unsupported underlying device\n");
1488 		goto fail_sync;
1489 	}
1490 
1491 	if (opts.conf->l2p_path) {
1492 		dev->conf.l2p_path = strdup(opts.conf->l2p_path);
1493 		if (!dev->conf.l2p_path) {
1494 			rc = -ENOMEM;
1495 			goto fail_sync;
1496 		}
1497 	}
1498 
1499 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1500 	/* so we don't have to clean up in each of the init functions. */
1501 	if (ftl_check_conf(dev, opts.conf)) {
1502 		SPDK_ERRLOG("Invalid device configuration\n");
1503 		goto fail_sync;
1504 	}
1505 
1506 	if (ftl_init_lba_map_pools(dev)) {
1507 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1508 		goto fail_sync;
1509 	}
1510 
1511 	if (ftl_init_media_events_pool(dev)) {
1512 		SPDK_ERRLOG("Unable to init media events pools\n");
1513 		goto fail_sync;
1514 	}
1515 
1516 	ftl_init_wptr_list(dev);
1517 
1518 	if (ftl_dev_init_bands(dev)) {
1519 		SPDK_ERRLOG("Unable to initialize band array\n");
1520 		goto fail_sync;
1521 	}
1522 
1523 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
1524 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1525 		goto fail_sync;
1526 	}
1527 
1528 	dev->reloc = ftl_reloc_init(dev);
1529 	if (!dev->reloc) {
1530 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1531 		goto fail_sync;
1532 	}
1533 
1534 	if (ftl_dev_init_io_channel(dev)) {
1535 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1536 		goto fail_sync;
1537 	}
1538 
1539 	if (ftl_dev_init_zones(init_ctx)) {
1540 		SPDK_ERRLOG("Failed to initialize zones\n");
1541 		goto fail_async;
1542 	}
1543 
1544 	return 0;
1545 fail_sync:
1546 	ftl_dev_free_sync(dev);
1547 	ftl_dev_free_init_ctx(init_ctx);
1548 	return rc;
1549 fail_async:
1550 	ftl_init_fail(init_ctx);
1551 	return 0;
1552 }
1553 
1554 static void
1555 _ftl_halt_defrag(void *arg)
1556 {
1557 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1558 }
1559 
1560 static void
1561 ftl_halt_complete_cb(void *ctx)
1562 {
1563 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1564 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1565 
1566 	/* Make sure core IO channel has already been released */
1567 	if (dev->num_io_channels > 0) {
1568 		spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1569 		return;
1570 	}
1571 
1572 	spdk_io_device_unregister(fini_ctx->dev, NULL);
1573 
1574 	ftl_dev_free_sync(fini_ctx->dev);
1575 	if (fini_ctx->cb_fn != NULL) {
1576 		fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
1577 	}
1578 
1579 	ftl_dev_free_init_ctx(fini_ctx);
1580 }
1581 
1582 static void
1583 ftl_put_io_channel_cb(void *ctx)
1584 {
1585 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1586 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1587 
1588 	spdk_put_io_channel(dev->ioch);
1589 	spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
1590 }
1591 
1592 static void
1593 ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1594 {
1595 	struct ftl_dev_init_ctx *fini_ctx = cb_arg;
1596 	int rc = 0;
1597 
1598 	spdk_bdev_free_io(bdev_io);
1599 	if (spdk_unlikely(!success)) {
1600 		SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
1601 		rc = -EIO;
1602 	}
1603 
1604 	fini_ctx->halt_complete_status = rc;
1605 	spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1606 }
1607 
1608 static int
1609 ftl_halt_poller(void *ctx)
1610 {
1611 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1612 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1613 
1614 	if (!dev->core_poller) {
1615 		spdk_poller_unregister(&fini_ctx->poller);
1616 
1617 		if (ftl_dev_has_nv_cache(dev)) {
1618 			ftl_nv_cache_write_header(&dev->nv_cache, true,
1619 						  ftl_nv_cache_header_fini_cb, fini_ctx);
1620 		} else {
1621 			fini_ctx->halt_complete_status = 0;
1622 			spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 static void
1630 ftl_add_halt_poller(void *ctx)
1631 {
1632 	struct ftl_dev_init_ctx *fini_ctx = ctx;
1633 	struct spdk_ftl_dev *dev = fini_ctx->dev;
1634 
1635 	dev->halt = 1;
1636 
1637 	_ftl_halt_defrag(dev);
1638 
1639 	assert(!fini_ctx->poller);
1640 	fini_ctx->poller = spdk_poller_register(ftl_halt_poller, fini_ctx, 100);
1641 }
1642 
1643 static int
1644 _spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
1645 		   struct spdk_thread *thread)
1646 {
1647 	struct ftl_dev_init_ctx *fini_ctx;
1648 
1649 	if (dev->halt_started) {
1650 		dev->halt_started = true;
1651 		return -EBUSY;
1652 	}
1653 
1654 	fini_ctx = calloc(1, sizeof(*fini_ctx));
1655 	if (!fini_ctx) {
1656 		return -ENOMEM;
1657 	}
1658 
1659 	fini_ctx->dev = dev;
1660 	fini_ctx->cb_fn = cb_fn;
1661 	fini_ctx->cb_arg = cb_arg;
1662 	fini_ctx->thread = thread;
1663 
1664 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
1665 	return 0;
1666 }
1667 
1668 int
1669 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
1670 {
1671 	return _spdk_ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
1672 }
1673 
1674 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1675