xref: /spdk/lib/ftl/ftl_init.c (revision d0d19eb82e3ba677162ae5c1930d9ddcf728bcbf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/bdev_module.h"
38 #include "spdk_internal/log.h"
39 #include "spdk/ftl.h"
40 #include "ftl_core.h"
41 #include "ftl_anm.h"
42 #include "ftl_io.h"
43 #include "ftl_reloc.h"
44 #include "ftl_rwb.h"
45 #include "ftl_band.h"
46 #include "ftl_debug.h"
47 
48 #define FTL_CORE_RING_SIZE	4096
49 #define FTL_INIT_TIMEOUT	30
50 #define FTL_NSID		1
51 
52 #define ftl_range_intersect(s1, e1, s2, e2) \
53 	((s1) <= (e2) && (s2) <= (e1))
54 
55 struct ftl_admin_cmpl {
56 	struct spdk_nvme_cpl			status;
57 
58 	int					complete;
59 };
60 
61 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
62 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
63 static const struct spdk_ftl_conf	g_default_conf = {
64 	.defrag = {
65 		.limits = {
66 			/* 5 free bands  / 0 % host writes */
67 			[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
68 			/* 10 free bands / 5 % host writes */
69 			[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
70 			/* 20 free bands / 40 % host writes */
71 			[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
72 			/* 40 free bands / 100 % host writes - defrag starts running */
73 			[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
74 		},
75 		/* 10 percent valid lbks */
76 		.invalid_thld = 10,
77 	},
78 	/* 20% spare lbks */
79 	.lba_rsvd = 20,
80 	/* 6M write buffer */
81 	.rwb_size = 6 * 1024 * 1024,
82 	/* 90% band fill threshold */
83 	.band_thld = 90,
84 	/* Max 32 IO depth per band relocate */
85 	.max_reloc_qdepth = 32,
86 	/* Max 3 active band relocates */
87 	.max_active_relocs = 3,
88 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
89 	.user_io_pool_size = 2048,
90 };
91 
92 static void ftl_dev_free_sync(struct spdk_ftl_dev *dev);
93 
94 static void
95 ftl_admin_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
96 {
97 	struct ftl_admin_cmpl *cmpl = ctx;
98 
99 	cmpl->complete = 1;
100 	cmpl->status = *cpl;
101 }
102 
103 static int
104 ftl_band_init_md(struct ftl_band *band)
105 {
106 	struct ftl_md *md = &band->md;
107 
108 	md->vld_map = spdk_bit_array_create(ftl_num_band_lbks(band->dev));
109 	if (!md->vld_map) {
110 		return -ENOMEM;
111 	}
112 
113 	pthread_spin_init(&md->lock, PTHREAD_PROCESS_PRIVATE);
114 	ftl_band_md_clear(&band->md);
115 	return 0;
116 }
117 
118 static int
119 ftl_check_conf(const struct spdk_ftl_conf *conf)
120 {
121 	size_t i;
122 
123 	if (conf->defrag.invalid_thld >= 100) {
124 		return -1;
125 	}
126 	if (conf->lba_rsvd >= 100) {
127 		return -1;
128 	}
129 	if (conf->lba_rsvd == 0) {
130 		return -1;
131 	}
132 	if (conf->rwb_size == 0) {
133 		return -1;
134 	}
135 	if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
136 		return -1;
137 	}
138 
139 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
140 		if (conf->defrag.limits[i].limit > 100) {
141 			return -1;
142 		}
143 	}
144 
145 	return 0;
146 }
147 
148 static int
149 ftl_check_init_opts(const struct spdk_ftl_dev_init_opts *opts,
150 		    const struct spdk_ocssd_geometry_data *geo)
151 {
152 	struct spdk_ftl_dev *dev;
153 	size_t num_punits = geo->num_pu * geo->num_grp;
154 	int rc = 0;
155 
156 	if (opts->range.begin > opts->range.end || opts->range.end >= num_punits) {
157 		return -1;
158 	}
159 
160 	if (ftl_check_conf(opts->conf)) {
161 		return -1;
162 	}
163 
164 	pthread_mutex_lock(&g_ftl_queue_lock);
165 
166 	STAILQ_FOREACH(dev, &g_ftl_queue, stailq) {
167 		if (spdk_nvme_transport_id_compare(&dev->trid, &opts->trid)) {
168 			continue;
169 		}
170 
171 		if (ftl_range_intersect(opts->range.begin, opts->range.end,
172 					dev->range.begin, dev->range.end)) {
173 			rc = -1;
174 			goto out;
175 		}
176 	}
177 
178 out:
179 	pthread_mutex_unlock(&g_ftl_queue_lock);
180 	return rc;
181 }
182 
183 static int
184 ftl_retrieve_bbt_page(struct spdk_ftl_dev *dev, uint64_t offset,
185 		      struct spdk_ocssd_chunk_information_entry *info,
186 		      unsigned int num_entries)
187 {
188 	volatile struct ftl_admin_cmpl cmpl = {};
189 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
190 
191 	if (spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, nsid,
192 					     info, num_entries * sizeof(*info),
193 					     offset * sizeof(*info),
194 					     ftl_admin_cb, (void *)&cmpl)) {
195 		return -1;
196 	}
197 
198 	while (!cmpl.complete) {
199 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
200 	}
201 
202 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
203 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
204 			    cmpl.status.status.sc, cmpl.status.status.sct);
205 		return -1;
206 	}
207 
208 	return 0;
209 }
210 
211 static int
212 ftl_retrieve_bbt(struct spdk_ftl_dev *dev, const struct ftl_punit *punit,
213 		 struct spdk_ocssd_chunk_information_entry *info)
214 {
215 	uint32_t i = 0;
216 	unsigned int num_entries = PAGE_SIZE / sizeof(*info);
217 	uint64_t off = (punit->start_ppa.grp * dev->geo.num_pu + punit->start_ppa.pu) *
218 		       dev->geo.num_chk;
219 
220 	for (i = 0; i < dev->geo.num_chk; i += num_entries) {
221 		if (num_entries > dev->geo.num_chk - i) {
222 			num_entries = dev->geo.num_chk - i;
223 		}
224 
225 		if (ftl_retrieve_bbt_page(dev, off + i, &info[i], num_entries)) {
226 			return -1;
227 		}
228 	}
229 
230 	return 0;
231 }
232 
233 static unsigned char
234 ftl_get_chunk_state(const struct spdk_ocssd_chunk_information_entry *info)
235 {
236 	if (info->cs.free) {
237 		return FTL_CHUNK_STATE_FREE;
238 	}
239 
240 	if (info->cs.open) {
241 		return FTL_CHUNK_STATE_OPEN;
242 	}
243 
244 	if (info->cs.closed) {
245 		return FTL_CHUNK_STATE_CLOSED;
246 	}
247 
248 	if (info->cs.offline) {
249 		return FTL_CHUNK_STATE_BAD;
250 	}
251 
252 	assert(0 && "Invalid block state");
253 	return FTL_CHUNK_STATE_BAD;
254 }
255 
256 static void
257 ftl_remove_empty_bands(struct spdk_ftl_dev *dev)
258 {
259 	struct ftl_band *band, *temp_band;
260 
261 	/* Remove band from shut_bands list to prevent further processing */
262 	/* if all blocks on this band are bad */
263 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
264 		if (!band->num_chunks) {
265 			dev->num_bands--;
266 			LIST_REMOVE(band, list_entry);
267 		}
268 	}
269 }
270 
271 static int
272 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
273 {
274 	struct spdk_ocssd_chunk_information_entry	*info;
275 	struct ftl_band					*band, *pband;
276 	struct ftl_punit				*punit;
277 	struct ftl_chunk				*chunk;
278 	unsigned int					i, j;
279 	char						buf[128];
280 	int						rc = 0;
281 
282 	LIST_INIT(&dev->free_bands);
283 	LIST_INIT(&dev->shut_bands);
284 
285 	dev->num_free = 0;
286 	dev->num_bands = ftl_dev_num_bands(dev);
287 	dev->bands = calloc(ftl_dev_num_bands(dev), sizeof(*dev->bands));
288 	if (!dev->bands) {
289 		return -1;
290 	}
291 
292 	info = calloc(dev->geo.num_chk, sizeof(*info));
293 	if (!info) {
294 		return -1;
295 	}
296 
297 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
298 		band = &dev->bands[i];
299 		band->id = i;
300 		band->dev = dev;
301 		band->state = FTL_BAND_STATE_CLOSED;
302 
303 		if (LIST_EMPTY(&dev->shut_bands)) {
304 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
305 		} else {
306 			LIST_INSERT_AFTER(pband, band, list_entry);
307 		}
308 		pband = band;
309 
310 		CIRCLEQ_INIT(&band->chunks);
311 		band->chunk_buf = calloc(ftl_dev_num_punits(dev), sizeof(*band->chunk_buf));
312 		if (!band->chunk_buf) {
313 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
314 			rc = -1;
315 			goto out;
316 		}
317 
318 		rc = ftl_band_init_md(band);
319 		if (rc) {
320 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
321 			goto out;
322 		}
323 	}
324 
325 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
326 		punit = &dev->punits[i];
327 
328 		rc = ftl_retrieve_bbt(dev, punit, info);
329 		if (rc) {
330 			SPDK_ERRLOG("Failed to retrieve bbt for @ppa: %s [%lu]\n",
331 				    ftl_ppa2str(punit->start_ppa, buf, sizeof(buf)),
332 				    ftl_ppa_addr_pack(dev, punit->start_ppa));
333 			goto out;
334 		}
335 
336 		for (j = 0; j < ftl_dev_num_bands(dev); ++j) {
337 			band = &dev->bands[j];
338 			chunk = &band->chunk_buf[i];
339 			chunk->pos = i;
340 			chunk->state = ftl_get_chunk_state(&info[j]);
341 			chunk->punit = punit;
342 			chunk->start_ppa = punit->start_ppa;
343 			chunk->start_ppa.chk = band->id;
344 
345 			if (chunk->state != FTL_CHUNK_STATE_BAD) {
346 				band->num_chunks++;
347 				CIRCLEQ_INSERT_TAIL(&band->chunks, chunk, circleq);
348 			}
349 		}
350 	}
351 
352 	ftl_remove_empty_bands(dev);
353 out:
354 	free(info);
355 	return rc;
356 }
357 
358 static int
359 ftl_dev_init_punits(struct spdk_ftl_dev *dev)
360 {
361 	unsigned int i, punit;
362 
363 	dev->punits = calloc(ftl_dev_num_punits(dev), sizeof(*dev->punits));
364 	if (!dev->punits) {
365 		return -1;
366 	}
367 
368 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
369 		dev->punits[i].dev = dev;
370 		punit = dev->range.begin + i;
371 
372 		dev->punits[i].start_ppa.ppa = 0;
373 		dev->punits[i].start_ppa.grp = punit % dev->geo.num_grp;
374 		dev->punits[i].start_ppa.pu = punit / dev->geo.num_grp;
375 	}
376 
377 	return 0;
378 }
379 
380 static int
381 ftl_dev_retrieve_geo(struct spdk_ftl_dev *dev)
382 {
383 	volatile struct ftl_admin_cmpl cmpl = {};
384 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
385 
386 	if (spdk_nvme_ocssd_ctrlr_cmd_geometry(dev->ctrlr, nsid, &dev->geo, sizeof(dev->geo),
387 					       ftl_admin_cb, (void *)&cmpl)) {
388 		SPDK_ERRLOG("Unable to retrieve geometry\n");
389 		return -1;
390 	}
391 
392 	/* TODO: add a timeout */
393 	while (!cmpl.complete) {
394 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
395 	}
396 
397 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
398 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
399 			    cmpl.status.status.sc, cmpl.status.status.sct);
400 		return -1;
401 	}
402 
403 	/* TODO: add sanity checks for the geo */
404 	dev->ppa_len = dev->geo.lbaf.grp_len +
405 		       dev->geo.lbaf.pu_len +
406 		       dev->geo.lbaf.chk_len +
407 		       dev->geo.lbaf.lbk_len;
408 
409 	dev->ppaf.lbk_offset = 0;
410 	dev->ppaf.lbk_mask   = (1 << dev->geo.lbaf.lbk_len) - 1;
411 	dev->ppaf.chk_offset = dev->ppaf.lbk_offset + dev->geo.lbaf.lbk_len;
412 	dev->ppaf.chk_mask   = (1 << dev->geo.lbaf.chk_len) - 1;
413 	dev->ppaf.pu_offset  = dev->ppaf.chk_offset + dev->geo.lbaf.chk_len;
414 	dev->ppaf.pu_mask    = (1 << dev->geo.lbaf.pu_len) - 1;
415 	dev->ppaf.grp_offset = dev->ppaf.pu_offset + dev->geo.lbaf.pu_len;
416 	dev->ppaf.grp_mask   = (1 << dev->geo.lbaf.grp_len) - 1;
417 
418 	/* We're using optimal write size as our xfer size */
419 	dev->xfer_size = dev->geo.ws_opt;
420 
421 	return 0;
422 }
423 
424 static int
425 ftl_dev_nvme_init(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
426 {
427 	uint32_t block_size;
428 
429 	dev->ctrlr = opts->ctrlr;
430 
431 	if (spdk_nvme_ctrlr_get_num_ns(dev->ctrlr) != 1) {
432 		SPDK_ERRLOG("Unsupported number of namespaces\n");
433 		return -1;
434 	}
435 
436 	dev->ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, FTL_NSID);
437 	dev->trid = opts->trid;
438 	dev->md_size = spdk_nvme_ns_get_md_size(dev->ns);
439 
440 	block_size = spdk_nvme_ns_get_extended_sector_size(dev->ns);
441 	if (block_size != FTL_BLOCK_SIZE) {
442 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
443 		return -1;
444 	}
445 
446 	if (dev->md_size % sizeof(uint32_t) != 0) {
447 		/* Metadata pointer must be dword aligned */
448 		SPDK_ERRLOG("Unsupported metadata size (%zu)\n", dev->md_size);
449 		return -1;
450 	}
451 
452 	return 0;
453 }
454 
455 static int
456 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc)
457 {
458 	struct spdk_bdev *bdev;
459 
460 	if (!bdev_desc) {
461 		return 0;
462 	}
463 
464 	bdev = spdk_bdev_desc_get_bdev(bdev_desc);
465 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
466 		     spdk_bdev_get_name(bdev));
467 
468 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
469 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
470 		return -1;
471 	}
472 
473 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
474 	 * from the fact that cache works as a protection against power loss, so before the data
475 	 * inside the cache can be overwritten, the band it's stored on has to be closed.
476 	 */
477 	if (spdk_bdev_get_num_blocks(bdev) < ftl_num_band_lbks(dev) * 2) {
478 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache(%"PRIu64"\n",
479 			    spdk_bdev_get_num_blocks(bdev));
480 		return -1;
481 	}
482 
483 	if (pthread_spin_init(&dev->nv_cache.lock, PTHREAD_PROCESS_PRIVATE)) {
484 		SPDK_ERRLOG("Failed to initialize cache lock\n");
485 		return -1;
486 	}
487 
488 	dev->nv_cache.bdev_desc = bdev_desc;
489 	dev->nv_cache.current_addr = 0;
490 	dev->nv_cache.num_available = spdk_bdev_get_num_blocks(bdev);
491 
492 	return 0;
493 }
494 
495 void
496 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
497 {
498 	*conf = g_default_conf;
499 }
500 
501 static int
502 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
503 {
504 #define POOL_NAME_LEN 128
505 	char pool_name[POOL_NAME_LEN];
506 	int rc;
507 
508 	LIST_INIT(&dev->wptr_list);
509 	LIST_INIT(&dev->flush_list);
510 
511 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lba-pool");
512 	if (rc < 0 || rc >= POOL_NAME_LEN) {
513 		return -ENAMETOOLONG;
514 	}
515 
516 	/* We need to reserve at least 2 buffers for band close / open sequence
517 	 * alone, plus additional (8) buffers for handling write errors.
518 	 * TODO: This memory pool is utilized only by core thread - it introduce
519 	 * unnecessary overhead and should be replaced by different data structure.
520 	 */
521 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
522 					    ftl_num_band_lbks(dev) * sizeof(uint64_t),
523 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
524 					    SPDK_ENV_SOCKET_ID_ANY);
525 	if (!dev->lba_pool) {
526 		return -ENOMEM;
527 	}
528 
529 	return 0;
530 }
531 
532 static size_t
533 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
534 {
535 	struct ftl_band *band;
536 	size_t seq = 0;
537 
538 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
539 		if (band->md.seq > seq) {
540 			seq = band->md.seq;
541 		}
542 	}
543 
544 	return seq;
545 }
546 
547 static void
548 _ftl_init_bands_state(void *ctx)
549 {
550 	struct ftl_band *band, *temp_band;
551 	struct spdk_ftl_dev *dev = ctx;
552 
553 	dev->seq = ftl_dev_band_max_seq(dev);
554 
555 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
556 		if (!band->md.num_vld) {
557 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
558 		}
559 	}
560 
561 	ftl_reloc_resume(dev->reloc);
562 	/* Clear the limit applications as they're incremented incorrectly by */
563 	/* the initialization code */
564 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
565 }
566 
567 static int
568 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
569 {
570 	struct ftl_band *band;
571 	int cnt = 0;
572 
573 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
574 		if (band->num_chunks && !band->md.num_vld) {
575 			cnt++;
576 		}
577 	}
578 	return cnt;
579 }
580 
581 static int
582 ftl_init_bands_state(struct spdk_ftl_dev *dev)
583 {
584 	/* TODO: Should we abort initialization or expose read only device */
585 	/* if there is no free bands? */
586 	/* If we abort initialization should we depend on condition that */
587 	/* we have no free bands or should we have some minimal number of */
588 	/* free bands? */
589 	if (!ftl_init_num_free_bands(dev)) {
590 		return -1;
591 	}
592 
593 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
594 	return 0;
595 }
596 
597 static void
598 _ftl_dev_init_thread(void *ctx)
599 {
600 	struct ftl_thread *thread = ctx;
601 	struct spdk_ftl_dev *dev = thread->dev;
602 
603 	thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
604 	if (!thread->poller) {
605 		SPDK_ERRLOG("Unable to register poller\n");
606 		assert(0);
607 	}
608 
609 	if (spdk_get_thread() == ftl_get_core_thread(dev)) {
610 		ftl_anm_register_device(dev, ftl_process_anm_event);
611 	}
612 }
613 
614 static int
615 ftl_dev_init_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread,
616 		    struct spdk_thread *spdk_thread, spdk_poller_fn fn, uint64_t period_us)
617 {
618 	thread->dev = dev;
619 	thread->poller_fn = fn;
620 	thread->thread = spdk_thread;
621 	thread->period_us = period_us;
622 
623 	thread->qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0);
624 	if (!thread->qpair) {
625 		SPDK_ERRLOG("Unable to initialize qpair\n");
626 		return -1;
627 	}
628 
629 	spdk_thread_send_msg(spdk_thread, _ftl_dev_init_thread, thread);
630 	return 0;
631 }
632 
633 static int
634 ftl_dev_init_threads(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
635 {
636 	if (!opts->core_thread || !opts->read_thread) {
637 		return -1;
638 	}
639 
640 	if (ftl_dev_init_thread(dev, &dev->core_thread, opts->core_thread, ftl_task_core, 0)) {
641 		SPDK_ERRLOG("Unable to initialize core thread\n");
642 		return -1;
643 	}
644 
645 	if (ftl_dev_init_thread(dev, &dev->read_thread, opts->read_thread, ftl_task_read, 0)) {
646 		SPDK_ERRLOG("Unable to initialize read thread\n");
647 		return -1;
648 	}
649 
650 	return 0;
651 }
652 
653 static void
654 ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
655 {
656 	assert(thread->poller == NULL);
657 
658 	spdk_nvme_ctrlr_free_io_qpair(thread->qpair);
659 	thread->thread = NULL;
660 	thread->qpair = NULL;
661 }
662 
663 static int
664 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
665 {
666 	size_t addr_size;
667 	uint64_t i;
668 
669 	if (dev->num_lbas == 0) {
670 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
671 		return -1;
672 	}
673 
674 	if (dev->l2p) {
675 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
676 		return -1;
677 	}
678 
679 	addr_size = dev->ppa_len >= 32 ? 8 : 4;
680 	dev->l2p = malloc(dev->num_lbas * addr_size);
681 	if (!dev->l2p) {
682 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
683 		return -1;
684 	}
685 
686 	for (i = 0; i < dev->num_lbas; ++i) {
687 		ftl_l2p_set(dev, i, ftl_to_ppa(FTL_PPA_INVALID));
688 	}
689 
690 	return 0;
691 }
692 
693 static void
694 ftl_init_complete(struct spdk_ftl_dev *dev)
695 {
696 	pthread_mutex_lock(&g_ftl_queue_lock);
697 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
698 	pthread_mutex_unlock(&g_ftl_queue_lock);
699 
700 	dev->initialized = 1;
701 
702 	if (dev->init_cb) {
703 		dev->init_cb(dev, dev->init_arg, 0);
704 	}
705 
706 	dev->init_cb = NULL;
707 	dev->init_arg = NULL;
708 }
709 
710 static int
711 ftl_setup_initial_state(struct spdk_ftl_dev *dev)
712 {
713 	struct spdk_ftl_conf *conf = &dev->conf;
714 	size_t i;
715 
716 	spdk_uuid_generate(&dev->uuid);
717 
718 	dev->num_lbas = 0;
719 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
720 		dev->num_lbas += ftl_band_num_usable_lbks(&dev->bands[i]);
721 	}
722 
723 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
724 
725 	if (ftl_dev_l2p_alloc(dev)) {
726 		SPDK_ERRLOG("Unable to init l2p table\n");
727 		return -1;
728 	}
729 
730 	if (ftl_init_bands_state(dev)) {
731 		SPDK_ERRLOG("Unable to finish the initialization\n");
732 		return -1;
733 	}
734 
735 	ftl_init_complete(dev);
736 	return 0;
737 }
738 
739 struct ftl_init_fail_ctx {
740 	spdk_ftl_init_fn	cb;
741 	void			*arg;
742 };
743 
744 static void
745 ftl_init_fail_cb(void *ctx, int status)
746 {
747 	struct ftl_init_fail_ctx *fail_cb = ctx;
748 
749 	fail_cb->cb(NULL, fail_cb->arg, -ENODEV);
750 	free(fail_cb);
751 }
752 
753 static void
754 ftl_init_fail(struct spdk_ftl_dev *dev)
755 {
756 	struct ftl_init_fail_ctx *fail_cb;
757 
758 	fail_cb = malloc(sizeof(*fail_cb));
759 	if (!fail_cb) {
760 		SPDK_ERRLOG("Unable to allocate context to free the device\n");
761 		return;
762 	}
763 
764 	fail_cb->cb = dev->init_cb;
765 	fail_cb->arg = dev->init_arg;
766 	dev->halt_cb = NULL;
767 
768 	if (spdk_ftl_dev_free(dev, ftl_init_fail_cb, fail_cb)) {
769 		SPDK_ERRLOG("Unable to free the device\n");
770 		assert(0);
771 	}
772 }
773 
774 static void
775 ftl_restore_device_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
776 {
777 	if (status) {
778 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
779 		goto error;
780 	}
781 
782 	if (ftl_init_bands_state(dev)) {
783 		SPDK_ERRLOG("Unable to finish the initialization\n");
784 		goto error;
785 	}
786 
787 	ftl_init_complete(dev);
788 	return;
789 error:
790 	ftl_init_fail(dev);
791 }
792 
793 static void
794 ftl_restore_md_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
795 {
796 	if (status) {
797 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
798 		goto error;
799 	}
800 
801 	/* After the metadata is read it should be possible to allocate the L2P */
802 	if (ftl_dev_l2p_alloc(dev)) {
803 		SPDK_ERRLOG("Failed to allocate the L2P\n");
804 		goto error;
805 	}
806 
807 	if (ftl_restore_device(restore, ftl_restore_device_cb)) {
808 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
809 		goto error;
810 	}
811 
812 	return;
813 error:
814 	ftl_init_fail(dev);
815 }
816 
817 static int
818 ftl_restore_state(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
819 {
820 	dev->uuid = opts->uuid;
821 
822 	if (ftl_restore_md(dev, ftl_restore_md_cb)) {
823 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
824 		return -1;
825 	}
826 
827 	return 0;
828 }
829 
830 static int
831 ftl_io_channel_create_cb(void *io_device, void *ctx)
832 {
833 	struct spdk_ftl_dev *dev = io_device;
834 	struct ftl_io_channel *ioch = ctx;
835 	char mempool_name[32];
836 
837 	snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
838 	ioch->cache_ioch = NULL;
839 	ioch->elem_size = sizeof(struct ftl_md_io);
840 	ioch->io_pool = spdk_mempool_create(mempool_name,
841 					    dev->conf.user_io_pool_size,
842 					    ioch->elem_size,
843 					    0,
844 					    SPDK_ENV_SOCKET_ID_ANY);
845 	if (!ioch->io_pool) {
846 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
847 		return -1;
848 	}
849 
850 	if (dev->nv_cache.bdev_desc) {
851 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
852 		if (!ioch->cache_ioch) {
853 			SPDK_ERRLOG("Failed to create cache IO channel\n");
854 			spdk_mempool_free(ioch->io_pool);
855 			return -1;
856 		}
857 	}
858 
859 	return 0;
860 }
861 
862 static void
863 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
864 {
865 	struct ftl_io_channel *ioch = ctx;
866 
867 	spdk_mempool_free(ioch->io_pool);
868 
869 	if (ioch->cache_ioch) {
870 		spdk_put_io_channel(ioch->cache_ioch);
871 	}
872 }
873 
874 static int
875 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
876 {
877 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
878 				sizeof(struct ftl_io_channel),
879 				NULL);
880 
881 	dev->ioch = spdk_get_io_channel(dev);
882 	if (!dev->ioch) {
883 		spdk_io_device_unregister(dev, NULL);
884 		return -1;
885 	}
886 
887 	return 0;
888 }
889 
890 int
891 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb, void *cb_arg)
892 {
893 	struct spdk_ftl_dev *dev;
894 	struct spdk_ftl_dev_init_opts opts = *_opts;
895 
896 	dev = calloc(1, sizeof(*dev));
897 	if (!dev) {
898 		return -ENOMEM;
899 	}
900 
901 	if (!opts.conf) {
902 		opts.conf = &g_default_conf;
903 	}
904 
905 	TAILQ_INIT(&dev->retry_queue);
906 	dev->conf = *opts.conf;
907 	dev->init_cb = cb;
908 	dev->init_arg = cb_arg;
909 	dev->range = opts.range;
910 	dev->limit = SPDK_FTL_LIMIT_MAX;
911 
912 	dev->name = strdup(opts.name);
913 	if (!dev->name) {
914 		SPDK_ERRLOG("Unable to set device name\n");
915 		goto fail_sync;
916 	}
917 
918 	if (ftl_dev_nvme_init(dev, &opts)) {
919 		SPDK_ERRLOG("Unable to initialize NVMe structures\n");
920 		goto fail_sync;
921 	}
922 
923 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
924 	/* so we don't have to clean up in each of the init functions. */
925 	if (ftl_dev_retrieve_geo(dev)) {
926 		SPDK_ERRLOG("Unable to retrieve geometry\n");
927 		goto fail_sync;
928 	}
929 
930 	if (ftl_check_init_opts(&opts, &dev->geo)) {
931 		SPDK_ERRLOG("Invalid device configuration\n");
932 		goto fail_sync;
933 	}
934 
935 	if (ftl_dev_init_punits(dev)) {
936 		SPDK_ERRLOG("Unable to initialize LUNs\n");
937 		goto fail_sync;
938 	}
939 
940 	if (ftl_init_wptr_list(dev)) {
941 		SPDK_ERRLOG("Unable to init wptr\n");
942 		goto fail_sync;
943 	}
944 
945 	if (ftl_dev_init_bands(dev)) {
946 		SPDK_ERRLOG("Unable to initialize band array\n");
947 		goto fail_sync;
948 	}
949 
950 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev_desc)) {
951 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
952 		goto fail_sync;
953 	}
954 
955 	dev->rwb = ftl_rwb_init(&dev->conf, dev->geo.ws_opt, dev->md_size);
956 	if (!dev->rwb) {
957 		SPDK_ERRLOG("Unable to initialize rwb structures\n");
958 		goto fail_sync;
959 	}
960 
961 	dev->reloc = ftl_reloc_init(dev);
962 	if (!dev->reloc) {
963 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
964 		goto fail_sync;
965 	}
966 
967 	if (ftl_dev_init_io_channel(dev)) {
968 		SPDK_ERRLOG("Unable to initialize IO channels\n");
969 		goto fail_sync;
970 	}
971 
972 	if (ftl_dev_init_threads(dev, &opts)) {
973 		SPDK_ERRLOG("Unable to initialize device threads\n");
974 		goto fail_sync;
975 	}
976 
977 	if (opts.mode & SPDK_FTL_MODE_CREATE) {
978 		if (ftl_setup_initial_state(dev)) {
979 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
980 			goto fail_async;
981 		}
982 	} else {
983 		if (ftl_restore_state(dev, &opts)) {
984 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
985 			goto fail_async;
986 		}
987 	}
988 
989 	return 0;
990 fail_sync:
991 	ftl_dev_free_sync(dev);
992 	return -ENOMEM;
993 fail_async:
994 	ftl_init_fail(dev);
995 	return 0;
996 }
997 
998 static void
999 _ftl_halt_defrag(void *arg)
1000 {
1001 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1002 }
1003 
1004 static void
1005 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1006 {
1007 	struct spdk_ftl_dev *iter;
1008 	size_t i;
1009 
1010 	if (!dev) {
1011 		return;
1012 	}
1013 
1014 	pthread_mutex_lock(&g_ftl_queue_lock);
1015 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1016 		if (iter == dev) {
1017 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1018 			break;
1019 		}
1020 	}
1021 	pthread_mutex_unlock(&g_ftl_queue_lock);
1022 
1023 	assert(LIST_EMPTY(&dev->wptr_list));
1024 
1025 	ftl_dev_dump_bands(dev);
1026 	ftl_dev_dump_stats(dev);
1027 
1028 	if (dev->ioch) {
1029 		spdk_put_io_channel(dev->ioch);
1030 		spdk_io_device_unregister(dev, NULL);
1031 	}
1032 
1033 	if (dev->bands) {
1034 		for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
1035 			free(dev->bands[i].chunk_buf);
1036 			spdk_bit_array_free(&dev->bands[i].md.vld_map);
1037 		}
1038 	}
1039 
1040 	spdk_mempool_free(dev->lba_pool);
1041 
1042 	ftl_rwb_free(dev->rwb);
1043 	ftl_reloc_free(dev->reloc);
1044 
1045 	free(dev->name);
1046 	free(dev->punits);
1047 	free(dev->bands);
1048 	free(dev->l2p);
1049 	free(dev);
1050 }
1051 
1052 static int
1053 ftl_halt_poller(void *ctx)
1054 {
1055 	struct spdk_ftl_dev *dev = ctx;
1056 	spdk_ftl_fn halt_cb = dev->halt_cb;
1057 	void *halt_arg = dev->halt_arg;
1058 
1059 	if (!dev->core_thread.poller && !dev->read_thread.poller) {
1060 		spdk_poller_unregister(&dev->halt_poller);
1061 
1062 		ftl_dev_free_thread(dev, &dev->read_thread);
1063 		ftl_dev_free_thread(dev, &dev->core_thread);
1064 
1065 		ftl_anm_unregister_device(dev);
1066 		ftl_dev_free_sync(dev);
1067 
1068 		if (halt_cb) {
1069 			halt_cb(halt_arg, 0);
1070 		}
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 static void
1077 ftl_add_halt_poller(void *ctx)
1078 {
1079 	struct spdk_ftl_dev *dev = ctx;
1080 
1081 	_ftl_halt_defrag(dev);
1082 
1083 	assert(!dev->halt_poller);
1084 	dev->halt_poller = spdk_poller_register(ftl_halt_poller, dev, 100);
1085 }
1086 
1087 int
1088 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_fn cb, void *cb_arg)
1089 {
1090 	if (dev->halt_cb) {
1091 		return -EBUSY;
1092 	}
1093 
1094 	dev->halt_cb = cb;
1095 	dev->halt_arg = cb_arg;
1096 	dev->halt = 1;
1097 
1098 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, dev);
1099 	return 0;
1100 }
1101 
1102 int
1103 spdk_ftl_module_init(const struct ftl_module_init_opts *opts, spdk_ftl_fn cb, void *cb_arg)
1104 {
1105 	return ftl_anm_init(opts->anm_thread, cb, cb_arg);
1106 }
1107 
1108 int
1109 spdk_ftl_module_fini(spdk_ftl_fn cb, void *cb_arg)
1110 {
1111 	return ftl_anm_free(cb, cb_arg);
1112 }
1113 
1114 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1115