xref: /spdk/lib/ftl/ftl_init.c (revision 73204fe2e526f20c3fe7ea2066c0a4ed9dc60272)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/bdev_module.h"
38 #include "spdk_internal/log.h"
39 #include "spdk/ftl.h"
40 #include "ftl_core.h"
41 #include "ftl_anm.h"
42 #include "ftl_io.h"
43 #include "ftl_reloc.h"
44 #include "ftl_rwb.h"
45 #include "ftl_band.h"
46 #include "ftl_debug.h"
47 
48 #define FTL_CORE_RING_SIZE	4096
49 #define FTL_INIT_TIMEOUT	30
50 #define FTL_NSID		1
51 
52 #define ftl_range_intersect(s1, e1, s2, e2) \
53 	((s1) <= (e2) && (s2) <= (e1))
54 
55 struct ftl_admin_cmpl {
56 	struct spdk_nvme_cpl			status;
57 
58 	int					complete;
59 };
60 
61 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
62 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
63 static const struct spdk_ftl_conf	g_default_conf = {
64 	.defrag = {
65 		.limits = {
66 			/* 5 free bands  / 0 % host writes */
67 			[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
68 			/* 10 free bands / 5 % host writes */
69 			[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
70 			/* 20 free bands / 40 % host writes */
71 			[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
72 			/* 40 free bands / 100 % host writes - defrag starts running */
73 			[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
74 		},
75 		/* 10 percent valid lbks */
76 		.invalid_thld = 10,
77 	},
78 	/* 20% spare lbks */
79 	.lba_rsvd = 20,
80 	/* 6M write buffer */
81 	.rwb_size = 6 * 1024 * 1024,
82 	/* 90% band fill threshold */
83 	.band_thld = 90,
84 	/* Max 32 IO depth per band relocate */
85 	.max_reloc_qdepth = 32,
86 	/* Max 3 active band relocates */
87 	.max_active_relocs = 3,
88 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
89 	.user_io_pool_size = 2048,
90 	/* Number of interleaving units per ws_opt */
91 	/* 1 for default and 3 for 3D TLC NAND */
92 	.num_interleave_units = 1,
93 	/*
94 	 * If clear ftl will return error when restoring after a dirty shutdown
95 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
96 	 * will result in lost data after recovery.
97 	 */
98 	.allow_open_bands = false,
99 };
100 
101 static void ftl_dev_free_sync(struct spdk_ftl_dev *dev);
102 
103 static void
104 ftl_admin_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
105 {
106 	struct ftl_admin_cmpl *cmpl = ctx;
107 
108 	cmpl->complete = 1;
109 	cmpl->status = *cpl;
110 }
111 
112 static int
113 ftl_band_init_md(struct ftl_band *band)
114 {
115 	struct ftl_lba_map *lba_map = &band->lba_map;
116 
117 	lba_map->vld = spdk_bit_array_create(ftl_num_band_lbks(band->dev));
118 	if (!lba_map->vld) {
119 		return -ENOMEM;
120 	}
121 
122 	pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
123 	ftl_band_md_clear(band);
124 	return 0;
125 }
126 
127 static int
128 ftl_check_conf(const struct spdk_ftl_conf *conf,
129 	       const struct spdk_ocssd_geometry_data *geo)
130 {
131 	size_t i;
132 
133 	if (conf->defrag.invalid_thld >= 100) {
134 		return -1;
135 	}
136 	if (conf->lba_rsvd >= 100) {
137 		return -1;
138 	}
139 	if (conf->lba_rsvd == 0) {
140 		return -1;
141 	}
142 	if (conf->rwb_size == 0) {
143 		return -1;
144 	}
145 	if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
146 		return -1;
147 	}
148 	if (geo->ws_opt % conf->num_interleave_units != 0) {
149 		return -1;
150 	}
151 
152 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
153 		if (conf->defrag.limits[i].limit > 100) {
154 			return -1;
155 		}
156 	}
157 
158 	return 0;
159 }
160 
161 static int
162 ftl_check_init_opts(const struct spdk_ftl_dev_init_opts *opts,
163 		    const struct spdk_ocssd_geometry_data *geo)
164 {
165 	struct spdk_ftl_dev *dev;
166 	size_t num_punits = geo->num_pu * geo->num_grp;
167 	int rc = 0;
168 
169 	if (opts->range.begin > opts->range.end || opts->range.end >= num_punits) {
170 		return -1;
171 	}
172 
173 	if (ftl_check_conf(opts->conf, geo)) {
174 		return -1;
175 	}
176 
177 	pthread_mutex_lock(&g_ftl_queue_lock);
178 
179 	STAILQ_FOREACH(dev, &g_ftl_queue, stailq) {
180 		if (spdk_nvme_transport_id_compare(&dev->trid, &opts->trid)) {
181 			continue;
182 		}
183 
184 		if (ftl_range_intersect(opts->range.begin, opts->range.end,
185 					dev->range.begin, dev->range.end)) {
186 			rc = -1;
187 			goto out;
188 		}
189 	}
190 
191 out:
192 	pthread_mutex_unlock(&g_ftl_queue_lock);
193 	return rc;
194 }
195 
196 int
197 ftl_retrieve_chunk_info(struct spdk_ftl_dev *dev, struct ftl_ppa ppa,
198 			struct spdk_ocssd_chunk_information_entry *info,
199 			unsigned int num_entries)
200 {
201 	volatile struct ftl_admin_cmpl cmpl = {};
202 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
203 	uint64_t offset = (ppa.grp * dev->geo.num_pu + ppa.pu) *
204 			  dev->geo.num_chk + ppa.chk;
205 
206 	if (spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, nsid,
207 					     info, num_entries * sizeof(*info),
208 					     offset * sizeof(*info),
209 					     ftl_admin_cb, (void *)&cmpl)) {
210 		return -1;
211 	}
212 
213 	while (!cmpl.complete) {
214 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
215 	}
216 
217 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
218 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
219 			    cmpl.status.status.sc, cmpl.status.status.sct);
220 		return -1;
221 	}
222 
223 	return 0;
224 }
225 
226 static int
227 ftl_retrieve_punit_chunk_info(struct spdk_ftl_dev *dev, const struct ftl_punit *punit,
228 			      struct spdk_ocssd_chunk_information_entry *info)
229 {
230 	uint32_t i = 0;
231 	unsigned int num_entries = FTL_BLOCK_SIZE / sizeof(*info);
232 	struct ftl_ppa chunk_ppa = punit->start_ppa;
233 
234 	for (i = 0; i < dev->geo.num_chk; i += num_entries, chunk_ppa.chk += num_entries) {
235 		if (num_entries > dev->geo.num_chk - i) {
236 			num_entries = dev->geo.num_chk - i;
237 		}
238 
239 		if (ftl_retrieve_chunk_info(dev, chunk_ppa, &info[i], num_entries)) {
240 			return -1;
241 		}
242 	}
243 
244 	return 0;
245 }
246 
247 static unsigned char
248 ftl_get_chunk_state(const struct spdk_ocssd_chunk_information_entry *info)
249 {
250 	if (info->cs.free) {
251 		return FTL_CHUNK_STATE_FREE;
252 	}
253 
254 	if (info->cs.open) {
255 		return FTL_CHUNK_STATE_OPEN;
256 	}
257 
258 	if (info->cs.closed) {
259 		return FTL_CHUNK_STATE_CLOSED;
260 	}
261 
262 	if (info->cs.offline) {
263 		return FTL_CHUNK_STATE_BAD;
264 	}
265 
266 	assert(0 && "Invalid block state");
267 	return FTL_CHUNK_STATE_BAD;
268 }
269 
270 static void
271 ftl_remove_empty_bands(struct spdk_ftl_dev *dev)
272 {
273 	struct ftl_band *band, *temp_band;
274 
275 	/* Remove band from shut_bands list to prevent further processing */
276 	/* if all blocks on this band are bad */
277 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
278 		if (!band->num_chunks) {
279 			dev->num_bands--;
280 			LIST_REMOVE(band, list_entry);
281 		}
282 	}
283 }
284 
285 static int
286 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
287 {
288 	struct spdk_ocssd_chunk_information_entry	*info;
289 	struct ftl_band					*band, *pband;
290 	struct ftl_punit				*punit;
291 	struct ftl_chunk				*chunk;
292 	unsigned int					i, j;
293 	char						buf[128];
294 	int						rc = 0;
295 
296 	LIST_INIT(&dev->free_bands);
297 	LIST_INIT(&dev->shut_bands);
298 
299 	dev->num_free = 0;
300 	dev->num_bands = ftl_dev_num_bands(dev);
301 	dev->bands = calloc(ftl_dev_num_bands(dev), sizeof(*dev->bands));
302 	if (!dev->bands) {
303 		return -1;
304 	}
305 
306 	info = calloc(dev->geo.num_chk, sizeof(*info));
307 	if (!info) {
308 		return -1;
309 	}
310 
311 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
312 		band = &dev->bands[i];
313 		band->id = i;
314 		band->dev = dev;
315 		band->state = FTL_BAND_STATE_CLOSED;
316 
317 		if (LIST_EMPTY(&dev->shut_bands)) {
318 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
319 		} else {
320 			LIST_INSERT_AFTER(pband, band, list_entry);
321 		}
322 		pband = band;
323 
324 		CIRCLEQ_INIT(&band->chunks);
325 		band->chunk_buf = calloc(ftl_dev_num_punits(dev), sizeof(*band->chunk_buf));
326 		if (!band->chunk_buf) {
327 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
328 			rc = -1;
329 			goto out;
330 		}
331 
332 		rc = ftl_band_init_md(band);
333 		if (rc) {
334 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
335 			goto out;
336 		}
337 	}
338 
339 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
340 		punit = &dev->punits[i];
341 
342 		rc = ftl_retrieve_punit_chunk_info(dev, punit, info);
343 		if (rc) {
344 			SPDK_ERRLOG("Failed to retrieve bbt for @ppa: %s [%lu]\n",
345 				    ftl_ppa2str(punit->start_ppa, buf, sizeof(buf)),
346 				    ftl_ppa_addr_pack(dev, punit->start_ppa));
347 			goto out;
348 		}
349 
350 		for (j = 0; j < ftl_dev_num_bands(dev); ++j) {
351 			band = &dev->bands[j];
352 			chunk = &band->chunk_buf[i];
353 			chunk->pos = i;
354 			chunk->state = ftl_get_chunk_state(&info[j]);
355 			chunk->punit = punit;
356 			chunk->start_ppa = punit->start_ppa;
357 			chunk->start_ppa.chk = band->id;
358 
359 			if (chunk->state != FTL_CHUNK_STATE_BAD) {
360 				band->num_chunks++;
361 				CIRCLEQ_INSERT_TAIL(&band->chunks, chunk, circleq);
362 			}
363 		}
364 	}
365 
366 	ftl_remove_empty_bands(dev);
367 out:
368 	free(info);
369 	return rc;
370 }
371 
372 static int
373 ftl_dev_init_punits(struct spdk_ftl_dev *dev)
374 {
375 	unsigned int i, punit;
376 
377 	dev->punits = calloc(ftl_dev_num_punits(dev), sizeof(*dev->punits));
378 	if (!dev->punits) {
379 		return -1;
380 	}
381 
382 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
383 		dev->punits[i].dev = dev;
384 		punit = dev->range.begin + i;
385 
386 		dev->punits[i].start_ppa.ppa = 0;
387 		dev->punits[i].start_ppa.grp = punit % dev->geo.num_grp;
388 		dev->punits[i].start_ppa.pu = punit / dev->geo.num_grp;
389 	}
390 
391 	return 0;
392 }
393 
394 static int
395 ftl_dev_retrieve_geo(struct spdk_ftl_dev *dev)
396 {
397 	volatile struct ftl_admin_cmpl cmpl = {};
398 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
399 
400 	if (spdk_nvme_ocssd_ctrlr_cmd_geometry(dev->ctrlr, nsid, &dev->geo, sizeof(dev->geo),
401 					       ftl_admin_cb, (void *)&cmpl)) {
402 		SPDK_ERRLOG("Unable to retrieve geometry\n");
403 		return -1;
404 	}
405 
406 	/* TODO: add a timeout */
407 	while (!cmpl.complete) {
408 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
409 	}
410 
411 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
412 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
413 			    cmpl.status.status.sc, cmpl.status.status.sct);
414 		return -1;
415 	}
416 
417 	/* TODO: add sanity checks for the geo */
418 	dev->ppa_len = dev->geo.lbaf.grp_len +
419 		       dev->geo.lbaf.pu_len +
420 		       dev->geo.lbaf.chk_len +
421 		       dev->geo.lbaf.lbk_len;
422 
423 	dev->ppaf.lbk_offset = 0;
424 	dev->ppaf.lbk_mask   = (1 << dev->geo.lbaf.lbk_len) - 1;
425 	dev->ppaf.chk_offset = dev->ppaf.lbk_offset + dev->geo.lbaf.lbk_len;
426 	dev->ppaf.chk_mask   = (1 << dev->geo.lbaf.chk_len) - 1;
427 	dev->ppaf.pu_offset  = dev->ppaf.chk_offset + dev->geo.lbaf.chk_len;
428 	dev->ppaf.pu_mask    = (1 << dev->geo.lbaf.pu_len) - 1;
429 	dev->ppaf.grp_offset = dev->ppaf.pu_offset + dev->geo.lbaf.pu_len;
430 	dev->ppaf.grp_mask   = (1 << dev->geo.lbaf.grp_len) - 1;
431 
432 	/* We're using optimal write size as our xfer size */
433 	dev->xfer_size = dev->geo.ws_opt;
434 
435 	return 0;
436 }
437 
438 static int
439 ftl_dev_nvme_init(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
440 {
441 	uint32_t block_size;
442 
443 	dev->ctrlr = opts->ctrlr;
444 
445 	if (spdk_nvme_ctrlr_get_num_ns(dev->ctrlr) != 1) {
446 		SPDK_ERRLOG("Unsupported number of namespaces\n");
447 		return -1;
448 	}
449 
450 	dev->ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, FTL_NSID);
451 	dev->trid = opts->trid;
452 	dev->md_size = spdk_nvme_ns_get_md_size(dev->ns);
453 
454 	block_size = spdk_nvme_ns_get_extended_sector_size(dev->ns);
455 	if (block_size != FTL_BLOCK_SIZE) {
456 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
457 		return -1;
458 	}
459 
460 	if (dev->md_size % sizeof(uint32_t) != 0) {
461 		/* Metadata pointer must be dword aligned */
462 		SPDK_ERRLOG("Unsupported metadata size (%zu)\n", dev->md_size);
463 		return -1;
464 	}
465 
466 	return 0;
467 }
468 
469 static int
470 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc)
471 {
472 	struct spdk_bdev *bdev;
473 
474 	if (!bdev_desc) {
475 		return 0;
476 	}
477 
478 	bdev = spdk_bdev_desc_get_bdev(bdev_desc);
479 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
480 		     spdk_bdev_get_name(bdev));
481 
482 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
483 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
484 		return -1;
485 	}
486 
487 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
488 	 * from the fact that cache works as a protection against power loss, so before the data
489 	 * inside the cache can be overwritten, the band it's stored on has to be closed.
490 	 */
491 	if (spdk_bdev_get_num_blocks(bdev) < ftl_num_band_lbks(dev) * 2) {
492 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache(%"PRIu64"\n",
493 			    spdk_bdev_get_num_blocks(bdev));
494 		return -1;
495 	}
496 
497 	if (pthread_spin_init(&dev->nv_cache.lock, PTHREAD_PROCESS_PRIVATE)) {
498 		SPDK_ERRLOG("Failed to initialize cache lock\n");
499 		return -1;
500 	}
501 
502 	dev->nv_cache.bdev_desc = bdev_desc;
503 	dev->nv_cache.current_addr = 0;
504 	dev->nv_cache.num_available = spdk_bdev_get_num_blocks(bdev);
505 
506 	return 0;
507 }
508 
509 void
510 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
511 {
512 	*conf = g_default_conf;
513 }
514 
515 static void
516 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
517 {
518 	struct ftl_lba_map_request *request = obj;
519 	struct spdk_ftl_dev *dev = opaque;
520 
521 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
522 				    ftl_num_band_lbks(dev), FTL_NUM_LBA_IN_BLOCK));
523 }
524 
525 static int
526 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
527 {
528 #define POOL_NAME_LEN 128
529 	char pool_name[POOL_NAME_LEN];
530 	int rc;
531 
532 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lba-pool");
533 	if (rc < 0 || rc >= POOL_NAME_LEN) {
534 		return -ENAMETOOLONG;
535 	}
536 
537 	/* We need to reserve at least 2 buffers for band close / open sequence
538 	 * alone, plus additional (8) buffers for handling write errors.
539 	 * TODO: This memory pool is utilized only by core thread - it introduce
540 	 * unnecessary overhead and should be replaced by different data structure.
541 	 */
542 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
543 					    ftl_lba_map_pool_elem_size(dev),
544 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
545 					    SPDK_ENV_SOCKET_ID_ANY);
546 	if (!dev->lba_pool) {
547 		return -ENOMEM;
548 	}
549 
550 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lbareq-pool");
551 	if (rc < 0 || rc >= POOL_NAME_LEN) {
552 		return -ENAMETOOLONG;
553 	}
554 
555 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
556 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
557 				sizeof(struct ftl_lba_map_request),
558 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
559 				SPDK_ENV_SOCKET_ID_ANY,
560 				ftl_lba_map_request_ctor,
561 				dev);
562 	if (!dev->lba_request_pool) {
563 		return -ENOMEM;
564 	}
565 
566 	return 0;
567 }
568 
569 static void
570 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
571 {
572 	LIST_INIT(&dev->wptr_list);
573 	LIST_INIT(&dev->flush_list);
574 }
575 
576 static size_t
577 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
578 {
579 	struct ftl_band *band;
580 	size_t seq = 0;
581 
582 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
583 		if (band->seq > seq) {
584 			seq = band->seq;
585 		}
586 	}
587 
588 	return seq;
589 }
590 
591 static void
592 _ftl_init_bands_state(void *ctx)
593 {
594 	struct ftl_band *band, *temp_band;
595 	struct spdk_ftl_dev *dev = ctx;
596 
597 	dev->seq = ftl_dev_band_max_seq(dev);
598 
599 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
600 		if (!band->lba_map.num_vld) {
601 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
602 		}
603 	}
604 
605 	ftl_reloc_resume(dev->reloc);
606 	/* Clear the limit applications as they're incremented incorrectly by */
607 	/* the initialization code */
608 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
609 }
610 
611 static int
612 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
613 {
614 	struct ftl_band *band;
615 	int cnt = 0;
616 
617 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
618 		if (band->num_chunks && !band->lba_map.num_vld) {
619 			cnt++;
620 		}
621 	}
622 	return cnt;
623 }
624 
625 static int
626 ftl_init_bands_state(struct spdk_ftl_dev *dev)
627 {
628 	/* TODO: Should we abort initialization or expose read only device */
629 	/* if there is no free bands? */
630 	/* If we abort initialization should we depend on condition that */
631 	/* we have no free bands or should we have some minimal number of */
632 	/* free bands? */
633 	if (!ftl_init_num_free_bands(dev)) {
634 		return -1;
635 	}
636 
637 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
638 	return 0;
639 }
640 
641 static void
642 _ftl_dev_init_thread(void *ctx)
643 {
644 	struct ftl_thread *thread = ctx;
645 	struct spdk_ftl_dev *dev = thread->dev;
646 
647 	thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
648 	if (!thread->poller) {
649 		SPDK_ERRLOG("Unable to register poller\n");
650 		assert(0);
651 	}
652 
653 	if (spdk_get_thread() == ftl_get_core_thread(dev)) {
654 		ftl_anm_register_device(dev, ftl_process_anm_event);
655 	}
656 }
657 
658 static int
659 ftl_dev_init_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread,
660 		    struct spdk_thread *spdk_thread, spdk_poller_fn fn, uint64_t period_us)
661 {
662 	thread->dev = dev;
663 	thread->poller_fn = fn;
664 	thread->thread = spdk_thread;
665 	thread->period_us = period_us;
666 
667 	thread->qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0);
668 	if (!thread->qpair) {
669 		SPDK_ERRLOG("Unable to initialize qpair\n");
670 		return -1;
671 	}
672 
673 	spdk_thread_send_msg(spdk_thread, _ftl_dev_init_thread, thread);
674 	return 0;
675 }
676 
677 static int
678 ftl_dev_init_threads(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
679 {
680 	if (!opts->core_thread || !opts->read_thread) {
681 		return -1;
682 	}
683 
684 	if (ftl_dev_init_thread(dev, &dev->core_thread, opts->core_thread, ftl_task_core, 0)) {
685 		SPDK_ERRLOG("Unable to initialize core thread\n");
686 		return -1;
687 	}
688 
689 	if (ftl_dev_init_thread(dev, &dev->read_thread, opts->read_thread, ftl_task_read, 0)) {
690 		SPDK_ERRLOG("Unable to initialize read thread\n");
691 		return -1;
692 	}
693 
694 	return 0;
695 }
696 
697 static void
698 ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
699 {
700 	assert(thread->poller == NULL);
701 
702 	spdk_nvme_ctrlr_free_io_qpair(thread->qpair);
703 	thread->thread = NULL;
704 	thread->qpair = NULL;
705 }
706 
707 static int
708 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
709 {
710 	size_t addr_size;
711 	uint64_t i;
712 
713 	if (dev->num_lbas == 0) {
714 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
715 		return -1;
716 	}
717 
718 	if (dev->l2p) {
719 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
720 		return -1;
721 	}
722 
723 	addr_size = dev->ppa_len >= 32 ? 8 : 4;
724 	dev->l2p = malloc(dev->num_lbas * addr_size);
725 	if (!dev->l2p) {
726 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
727 		return -1;
728 	}
729 
730 	for (i = 0; i < dev->num_lbas; ++i) {
731 		ftl_l2p_set(dev, i, ftl_to_ppa(FTL_PPA_INVALID));
732 	}
733 
734 	return 0;
735 }
736 
737 static void
738 ftl_init_complete(struct spdk_ftl_dev *dev)
739 {
740 	pthread_mutex_lock(&g_ftl_queue_lock);
741 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
742 	pthread_mutex_unlock(&g_ftl_queue_lock);
743 
744 	dev->initialized = 1;
745 
746 	if (dev->init_cb) {
747 		dev->init_cb(dev, dev->init_arg, 0);
748 	}
749 
750 	dev->init_cb = NULL;
751 	dev->init_arg = NULL;
752 }
753 
754 static int
755 ftl_setup_initial_state(struct spdk_ftl_dev *dev)
756 {
757 	struct spdk_ftl_conf *conf = &dev->conf;
758 	size_t i;
759 
760 	spdk_uuid_generate(&dev->uuid);
761 
762 	dev->num_lbas = 0;
763 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
764 		dev->num_lbas += ftl_band_num_usable_lbks(&dev->bands[i]);
765 	}
766 
767 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
768 
769 	if (ftl_dev_l2p_alloc(dev)) {
770 		SPDK_ERRLOG("Unable to init l2p table\n");
771 		return -1;
772 	}
773 
774 	if (ftl_init_bands_state(dev)) {
775 		SPDK_ERRLOG("Unable to finish the initialization\n");
776 		return -1;
777 	}
778 
779 	ftl_init_complete(dev);
780 	return 0;
781 }
782 
783 struct ftl_init_fail_ctx {
784 	spdk_ftl_init_fn	cb;
785 	void			*arg;
786 };
787 
788 static void
789 ftl_init_fail_cb(void *ctx, int status)
790 {
791 	struct ftl_init_fail_ctx *fail_cb = ctx;
792 
793 	fail_cb->cb(NULL, fail_cb->arg, -ENODEV);
794 	free(fail_cb);
795 }
796 
797 static void
798 ftl_init_fail(struct spdk_ftl_dev *dev)
799 {
800 	struct ftl_init_fail_ctx *fail_cb;
801 
802 	fail_cb = malloc(sizeof(*fail_cb));
803 	if (!fail_cb) {
804 		SPDK_ERRLOG("Unable to allocate context to free the device\n");
805 		return;
806 	}
807 
808 	fail_cb->cb = dev->init_cb;
809 	fail_cb->arg = dev->init_arg;
810 	dev->halt_cb = NULL;
811 
812 	if (spdk_ftl_dev_free(dev, ftl_init_fail_cb, fail_cb)) {
813 		SPDK_ERRLOG("Unable to free the device\n");
814 		assert(0);
815 	}
816 }
817 
818 static void
819 ftl_restore_device_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
820 {
821 	if (status) {
822 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
823 		goto error;
824 	}
825 
826 	if (ftl_init_bands_state(dev)) {
827 		SPDK_ERRLOG("Unable to finish the initialization\n");
828 		goto error;
829 	}
830 
831 	ftl_init_complete(dev);
832 	return;
833 error:
834 	ftl_init_fail(dev);
835 }
836 
837 static void
838 ftl_restore_md_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
839 {
840 	if (status) {
841 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
842 		goto error;
843 	}
844 
845 	/* After the metadata is read it should be possible to allocate the L2P */
846 	if (ftl_dev_l2p_alloc(dev)) {
847 		SPDK_ERRLOG("Failed to allocate the L2P\n");
848 		goto error;
849 	}
850 
851 	if (ftl_restore_device(restore, ftl_restore_device_cb)) {
852 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
853 		goto error;
854 	}
855 
856 	return;
857 error:
858 	ftl_init_fail(dev);
859 }
860 
861 static int
862 ftl_restore_state(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
863 {
864 	dev->uuid = opts->uuid;
865 
866 	if (ftl_restore_md(dev, ftl_restore_md_cb)) {
867 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
868 		return -1;
869 	}
870 
871 	return 0;
872 }
873 
874 static int
875 ftl_io_channel_create_cb(void *io_device, void *ctx)
876 {
877 	struct spdk_ftl_dev *dev = io_device;
878 	struct ftl_io_channel *ioch = ctx;
879 	char mempool_name[32];
880 
881 	snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
882 	ioch->cache_ioch = NULL;
883 	ioch->dev = dev;
884 	ioch->elem_size = sizeof(struct ftl_md_io);
885 	ioch->io_pool = spdk_mempool_create(mempool_name,
886 					    dev->conf.user_io_pool_size,
887 					    ioch->elem_size,
888 					    0,
889 					    SPDK_ENV_SOCKET_ID_ANY);
890 	if (!ioch->io_pool) {
891 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
892 		return -1;
893 	}
894 
895 	if (dev->nv_cache.bdev_desc) {
896 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
897 		if (!ioch->cache_ioch) {
898 			SPDK_ERRLOG("Failed to create cache IO channel\n");
899 			spdk_mempool_free(ioch->io_pool);
900 			return -1;
901 		}
902 	}
903 
904 	return 0;
905 }
906 
907 static void
908 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
909 {
910 	struct ftl_io_channel *ioch = ctx;
911 
912 	spdk_mempool_free(ioch->io_pool);
913 
914 	if (ioch->cache_ioch) {
915 		spdk_put_io_channel(ioch->cache_ioch);
916 	}
917 }
918 
919 static int
920 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
921 {
922 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
923 				sizeof(struct ftl_io_channel),
924 				NULL);
925 
926 	dev->ioch = spdk_get_io_channel(dev);
927 	if (!dev->ioch) {
928 		spdk_io_device_unregister(dev, NULL);
929 		return -1;
930 	}
931 
932 	return 0;
933 }
934 
935 int
936 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb, void *cb_arg)
937 {
938 	struct spdk_ftl_dev *dev;
939 	struct spdk_ftl_dev_init_opts opts = *_opts;
940 
941 	dev = calloc(1, sizeof(*dev));
942 	if (!dev) {
943 		return -ENOMEM;
944 	}
945 
946 	if (!opts.conf) {
947 		opts.conf = &g_default_conf;
948 	}
949 
950 	TAILQ_INIT(&dev->retry_queue);
951 	dev->conf = *opts.conf;
952 	dev->init_cb = cb;
953 	dev->init_arg = cb_arg;
954 	dev->range = opts.range;
955 	dev->limit = SPDK_FTL_LIMIT_MAX;
956 
957 	dev->name = strdup(opts.name);
958 	if (!dev->name) {
959 		SPDK_ERRLOG("Unable to set device name\n");
960 		goto fail_sync;
961 	}
962 
963 	if (ftl_dev_nvme_init(dev, &opts)) {
964 		SPDK_ERRLOG("Unable to initialize NVMe structures\n");
965 		goto fail_sync;
966 	}
967 
968 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
969 	/* so we don't have to clean up in each of the init functions. */
970 	if (ftl_dev_retrieve_geo(dev)) {
971 		SPDK_ERRLOG("Unable to retrieve geometry\n");
972 		goto fail_sync;
973 	}
974 
975 	if (ftl_check_init_opts(&opts, &dev->geo)) {
976 		SPDK_ERRLOG("Invalid device configuration\n");
977 		goto fail_sync;
978 	}
979 
980 	if (ftl_dev_init_punits(dev)) {
981 		SPDK_ERRLOG("Unable to initialize LUNs\n");
982 		goto fail_sync;
983 	}
984 
985 	if (ftl_init_lba_map_pools(dev)) {
986 		SPDK_ERRLOG("Unable to init LBA map pools\n");
987 		goto fail_sync;
988 	}
989 
990 	ftl_init_wptr_list(dev);
991 
992 	if (ftl_dev_init_bands(dev)) {
993 		SPDK_ERRLOG("Unable to initialize band array\n");
994 		goto fail_sync;
995 	}
996 
997 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev_desc)) {
998 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
999 		goto fail_sync;
1000 	}
1001 
1002 	dev->rwb = ftl_rwb_init(&dev->conf, dev->geo.ws_opt, dev->md_size, ftl_dev_num_punits(dev));
1003 	if (!dev->rwb) {
1004 		SPDK_ERRLOG("Unable to initialize rwb structures\n");
1005 		goto fail_sync;
1006 	}
1007 
1008 	dev->reloc = ftl_reloc_init(dev);
1009 	if (!dev->reloc) {
1010 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1011 		goto fail_sync;
1012 	}
1013 
1014 	if (ftl_dev_init_io_channel(dev)) {
1015 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1016 		goto fail_sync;
1017 	}
1018 
1019 	if (ftl_dev_init_threads(dev, &opts)) {
1020 		SPDK_ERRLOG("Unable to initialize device threads\n");
1021 		goto fail_sync;
1022 	}
1023 
1024 	if (opts.mode & SPDK_FTL_MODE_CREATE) {
1025 		if (ftl_setup_initial_state(dev)) {
1026 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
1027 			goto fail_async;
1028 		}
1029 	} else {
1030 		if (ftl_restore_state(dev, &opts)) {
1031 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
1032 			goto fail_async;
1033 		}
1034 	}
1035 
1036 	return 0;
1037 fail_sync:
1038 	ftl_dev_free_sync(dev);
1039 	return -ENOMEM;
1040 fail_async:
1041 	ftl_init_fail(dev);
1042 	return 0;
1043 }
1044 
1045 static void
1046 _ftl_halt_defrag(void *arg)
1047 {
1048 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1049 }
1050 
1051 static void
1052 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1053 {
1054 	struct ftl_lba_map_request *request = obj;
1055 
1056 	spdk_bit_array_free(&request->segments);
1057 }
1058 
1059 static void
1060 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1061 {
1062 	struct spdk_ftl_dev *iter;
1063 	size_t i;
1064 
1065 	if (!dev) {
1066 		return;
1067 	}
1068 
1069 	pthread_mutex_lock(&g_ftl_queue_lock);
1070 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1071 		if (iter == dev) {
1072 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1073 			break;
1074 		}
1075 	}
1076 	pthread_mutex_unlock(&g_ftl_queue_lock);
1077 
1078 	assert(LIST_EMPTY(&dev->wptr_list));
1079 
1080 	ftl_dev_dump_bands(dev);
1081 	ftl_dev_dump_stats(dev);
1082 
1083 	if (dev->ioch) {
1084 		spdk_put_io_channel(dev->ioch);
1085 		spdk_io_device_unregister(dev, NULL);
1086 	}
1087 
1088 	if (dev->bands) {
1089 		for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
1090 			free(dev->bands[i].chunk_buf);
1091 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1092 		}
1093 	}
1094 
1095 	spdk_mempool_free(dev->lba_pool);
1096 	if (dev->lba_request_pool) {
1097 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1098 	}
1099 	spdk_mempool_free(dev->lba_request_pool);
1100 
1101 	ftl_rwb_free(dev->rwb);
1102 	ftl_reloc_free(dev->reloc);
1103 
1104 	free(dev->name);
1105 	free(dev->punits);
1106 	free(dev->bands);
1107 	free(dev->l2p);
1108 	free(dev);
1109 }
1110 
1111 static int
1112 ftl_halt_poller(void *ctx)
1113 {
1114 	struct spdk_ftl_dev *dev = ctx;
1115 	spdk_ftl_fn halt_cb = dev->halt_cb;
1116 	void *halt_arg = dev->halt_arg;
1117 
1118 	if (!dev->core_thread.poller && !dev->read_thread.poller) {
1119 		spdk_poller_unregister(&dev->halt_poller);
1120 
1121 		ftl_dev_free_thread(dev, &dev->read_thread);
1122 		ftl_dev_free_thread(dev, &dev->core_thread);
1123 
1124 		ftl_anm_unregister_device(dev);
1125 		ftl_dev_free_sync(dev);
1126 
1127 		if (halt_cb) {
1128 			halt_cb(halt_arg, 0);
1129 		}
1130 	}
1131 
1132 	return 0;
1133 }
1134 
1135 static void
1136 ftl_add_halt_poller(void *ctx)
1137 {
1138 	struct spdk_ftl_dev *dev = ctx;
1139 
1140 	_ftl_halt_defrag(dev);
1141 
1142 	assert(!dev->halt_poller);
1143 	dev->halt_poller = spdk_poller_register(ftl_halt_poller, dev, 100);
1144 }
1145 
1146 int
1147 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_fn cb, void *cb_arg)
1148 {
1149 	if (dev->halt_cb) {
1150 		return -EBUSY;
1151 	}
1152 
1153 	dev->halt_cb = cb;
1154 	dev->halt_arg = cb_arg;
1155 	dev->halt = 1;
1156 
1157 	ftl_rwb_disable_interleaving(dev->rwb);
1158 
1159 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, dev);
1160 	return 0;
1161 }
1162 
1163 int
1164 spdk_ftl_module_init(const struct ftl_module_init_opts *opts, spdk_ftl_fn cb, void *cb_arg)
1165 {
1166 	return ftl_anm_init(opts->anm_thread, cb, cb_arg);
1167 }
1168 
1169 int
1170 spdk_ftl_module_fini(spdk_ftl_fn cb, void *cb_arg)
1171 {
1172 	return ftl_anm_free(cb, cb_arg);
1173 }
1174 
1175 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1176