xref: /spdk/lib/ftl/ftl_init.c (revision a044e19470d20ae1792bedcd820e80d8ab4ad498)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/bdev_module.h"
38 #include "spdk/string.h"
39 #include "spdk/likely.h"
40 #include "spdk_internal/log.h"
41 #include "spdk/ftl.h"
42 #include "ftl_core.h"
43 #include "ftl_anm.h"
44 #include "ftl_io.h"
45 #include "ftl_reloc.h"
46 #include "ftl_rwb.h"
47 #include "ftl_band.h"
48 #include "ftl_debug.h"
49 
50 #define FTL_CORE_RING_SIZE	4096
51 #define FTL_INIT_TIMEOUT	30
52 #define FTL_NSID		1
53 
54 #define ftl_range_intersect(s1, e1, s2, e2) \
55 	((s1) <= (e2) && (s2) <= (e1))
56 
57 struct ftl_admin_cmpl {
58 	struct spdk_nvme_cpl			status;
59 
60 	int					complete;
61 };
62 
63 static STAILQ_HEAD(, spdk_ftl_dev)	g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
64 static pthread_mutex_t			g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
65 static const struct spdk_ftl_conf	g_default_conf = {
66 	.defrag = {
67 		.limits = {
68 			/* 5 free bands  / 0 % host writes */
69 			[SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
70 			/* 10 free bands / 5 % host writes */
71 			[SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
72 			/* 20 free bands / 40 % host writes */
73 			[SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
74 			/* 40 free bands / 100 % host writes - defrag starts running */
75 			[SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
76 		},
77 		/* 10 percent valid lbks */
78 		.invalid_thld = 10,
79 	},
80 	/* 20% spare lbks */
81 	.lba_rsvd = 20,
82 	/* 6M write buffer */
83 	.rwb_size = 6 * 1024 * 1024,
84 	/* 90% band fill threshold */
85 	.band_thld = 90,
86 	/* Max 32 IO depth per band relocate */
87 	.max_reloc_qdepth = 32,
88 	/* Max 3 active band relocates */
89 	.max_active_relocs = 3,
90 	/* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
91 	.user_io_pool_size = 2048,
92 	/* Number of interleaving units per ws_opt */
93 	/* 1 for default and 3 for 3D TLC NAND */
94 	.num_interleave_units = 1,
95 	/*
96 	 * If clear ftl will return error when restoring after a dirty shutdown
97 	 * If set, last band will be padded, ftl will restore based only on closed bands - this
98 	 * will result in lost data after recovery.
99 	 */
100 	.allow_open_bands = false,
101 	.nv_cache = {
102 		/* Maximum number of concurrent requests */
103 		.max_request_cnt = 2048,
104 		/* Maximum number of blocks per request */
105 		.max_request_size = 16,
106 	}
107 };
108 
109 static void ftl_dev_free_sync(struct spdk_ftl_dev *dev);
110 
111 static void
112 ftl_admin_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
113 {
114 	struct ftl_admin_cmpl *cmpl = ctx;
115 
116 	cmpl->complete = 1;
117 	cmpl->status = *cpl;
118 }
119 
120 static int
121 ftl_band_init_md(struct ftl_band *band)
122 {
123 	struct ftl_lba_map *lba_map = &band->lba_map;
124 
125 	lba_map->vld = spdk_bit_array_create(ftl_num_band_lbks(band->dev));
126 	if (!lba_map->vld) {
127 		return -ENOMEM;
128 	}
129 
130 	pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
131 	ftl_band_md_clear(band);
132 	return 0;
133 }
134 
135 static int
136 ftl_check_conf(const struct spdk_ftl_conf *conf,
137 	       const struct spdk_ocssd_geometry_data *geo)
138 {
139 	size_t i;
140 
141 	if (conf->defrag.invalid_thld >= 100) {
142 		return -1;
143 	}
144 	if (conf->lba_rsvd >= 100) {
145 		return -1;
146 	}
147 	if (conf->lba_rsvd == 0) {
148 		return -1;
149 	}
150 	if (conf->rwb_size == 0) {
151 		return -1;
152 	}
153 	if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
154 		return -1;
155 	}
156 	if (geo->ws_opt % conf->num_interleave_units != 0) {
157 		return -1;
158 	}
159 
160 	for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
161 		if (conf->defrag.limits[i].limit > 100) {
162 			return -1;
163 		}
164 	}
165 
166 	return 0;
167 }
168 
169 static int
170 ftl_check_init_opts(const struct spdk_ftl_dev_init_opts *opts,
171 		    const struct spdk_ocssd_geometry_data *geo)
172 {
173 	struct spdk_ftl_dev *dev;
174 	size_t num_punits = geo->num_pu * geo->num_grp;
175 	int rc = 0;
176 
177 	if (opts->range.begin > opts->range.end || opts->range.end >= num_punits) {
178 		return -1;
179 	}
180 
181 	if (ftl_check_conf(opts->conf, geo)) {
182 		return -1;
183 	}
184 
185 	pthread_mutex_lock(&g_ftl_queue_lock);
186 
187 	STAILQ_FOREACH(dev, &g_ftl_queue, stailq) {
188 		if (spdk_nvme_transport_id_compare(&dev->trid, &opts->trid)) {
189 			continue;
190 		}
191 
192 		if (ftl_range_intersect(opts->range.begin, opts->range.end,
193 					dev->range.begin, dev->range.end)) {
194 			rc = -1;
195 			goto out;
196 		}
197 	}
198 
199 out:
200 	pthread_mutex_unlock(&g_ftl_queue_lock);
201 	return rc;
202 }
203 
204 int
205 ftl_retrieve_chunk_info(struct spdk_ftl_dev *dev, struct ftl_ppa ppa,
206 			struct spdk_ocssd_chunk_information_entry *info,
207 			unsigned int num_entries)
208 {
209 	volatile struct ftl_admin_cmpl cmpl = {};
210 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
211 	uint64_t offset = (ppa.grp * dev->geo.num_pu + ppa.pu) *
212 			  dev->geo.num_chk + ppa.chk;
213 	int rc;
214 
215 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, nsid,
216 					      info, num_entries * sizeof(*info),
217 					      offset * sizeof(*info),
218 					      ftl_admin_cb, (void *)&cmpl);
219 	if (spdk_unlikely(rc != 0)) {
220 		SPDK_ERRLOG("spdk_nvme_ctrlr_cmd_get_log_page: %s\n", spdk_strerror(-rc));
221 		return -1;
222 	}
223 
224 	while (!cmpl.complete) {
225 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
226 	}
227 
228 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
229 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
230 			    cmpl.status.status.sc, cmpl.status.status.sct);
231 		return -1;
232 	}
233 
234 	return 0;
235 }
236 
237 static int
238 ftl_retrieve_punit_chunk_info(struct spdk_ftl_dev *dev, const struct ftl_punit *punit,
239 			      struct spdk_ocssd_chunk_information_entry *info)
240 {
241 	uint32_t i = 0;
242 	unsigned int num_entries = FTL_BLOCK_SIZE / sizeof(*info);
243 	struct ftl_ppa chunk_ppa = punit->start_ppa;
244 	char ppa_buf[128];
245 
246 	for (i = 0; i < dev->geo.num_chk; i += num_entries, chunk_ppa.chk += num_entries) {
247 		if (num_entries > dev->geo.num_chk - i) {
248 			num_entries = dev->geo.num_chk - i;
249 		}
250 
251 		if (ftl_retrieve_chunk_info(dev, chunk_ppa, &info[i], num_entries)) {
252 			SPDK_ERRLOG("Failed to retrieve chunk information @ppa: %s\n",
253 				    ftl_ppa2str(chunk_ppa, ppa_buf, sizeof(ppa_buf)));
254 			return -1;
255 		}
256 	}
257 
258 	return 0;
259 }
260 
261 static unsigned char
262 ftl_get_chunk_state(const struct spdk_ocssd_chunk_information_entry *info)
263 {
264 	if (info->cs.free) {
265 		return FTL_CHUNK_STATE_FREE;
266 	}
267 
268 	if (info->cs.open) {
269 		return FTL_CHUNK_STATE_OPEN;
270 	}
271 
272 	if (info->cs.closed) {
273 		return FTL_CHUNK_STATE_CLOSED;
274 	}
275 
276 	if (info->cs.offline) {
277 		return FTL_CHUNK_STATE_BAD;
278 	}
279 
280 	assert(0 && "Invalid block state");
281 	return FTL_CHUNK_STATE_BAD;
282 }
283 
284 static void
285 ftl_remove_empty_bands(struct spdk_ftl_dev *dev)
286 {
287 	struct ftl_band *band, *temp_band;
288 
289 	/* Remove band from shut_bands list to prevent further processing */
290 	/* if all blocks on this band are bad */
291 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
292 		if (!band->num_chunks) {
293 			dev->num_bands--;
294 			LIST_REMOVE(band, list_entry);
295 		}
296 	}
297 }
298 
299 static int
300 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
301 {
302 	struct spdk_ocssd_chunk_information_entry	*info;
303 	struct ftl_band					*band, *pband;
304 	struct ftl_punit				*punit;
305 	struct ftl_chunk				*chunk;
306 	unsigned int					i, j;
307 	char						buf[128];
308 	int						rc = 0;
309 
310 	LIST_INIT(&dev->free_bands);
311 	LIST_INIT(&dev->shut_bands);
312 
313 	dev->num_free = 0;
314 	dev->num_bands = ftl_dev_num_bands(dev);
315 	dev->bands = calloc(ftl_dev_num_bands(dev), sizeof(*dev->bands));
316 	if (!dev->bands) {
317 		return -1;
318 	}
319 
320 	info = calloc(dev->geo.num_chk, sizeof(*info));
321 	if (!info) {
322 		return -1;
323 	}
324 
325 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
326 		band = &dev->bands[i];
327 		band->id = i;
328 		band->dev = dev;
329 		band->state = FTL_BAND_STATE_CLOSED;
330 
331 		if (LIST_EMPTY(&dev->shut_bands)) {
332 			LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
333 		} else {
334 			LIST_INSERT_AFTER(pband, band, list_entry);
335 		}
336 		pband = band;
337 
338 		CIRCLEQ_INIT(&band->chunks);
339 		band->chunk_buf = calloc(ftl_dev_num_punits(dev), sizeof(*band->chunk_buf));
340 		if (!band->chunk_buf) {
341 			SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
342 			rc = -1;
343 			goto out;
344 		}
345 
346 		rc = ftl_band_init_md(band);
347 		if (rc) {
348 			SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
349 			goto out;
350 		}
351 	}
352 
353 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
354 		punit = &dev->punits[i];
355 
356 		rc = ftl_retrieve_punit_chunk_info(dev, punit, info);
357 		if (rc) {
358 			SPDK_ERRLOG("Failed to retrieve bbt for @ppa: %s [%lu]\n",
359 				    ftl_ppa2str(punit->start_ppa, buf, sizeof(buf)),
360 				    ftl_ppa_addr_pack(dev, punit->start_ppa));
361 			goto out;
362 		}
363 
364 		for (j = 0; j < ftl_dev_num_bands(dev); ++j) {
365 			band = &dev->bands[j];
366 			chunk = &band->chunk_buf[i];
367 			chunk->pos = i;
368 			chunk->state = ftl_get_chunk_state(&info[j]);
369 			chunk->punit = punit;
370 			chunk->start_ppa = punit->start_ppa;
371 			chunk->start_ppa.chk = band->id;
372 			chunk->write_offset = ftl_dev_lbks_in_chunk(dev);
373 
374 			if (chunk->state != FTL_CHUNK_STATE_BAD) {
375 				band->num_chunks++;
376 				CIRCLEQ_INSERT_TAIL(&band->chunks, chunk, circleq);
377 			}
378 		}
379 	}
380 
381 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
382 		band = &dev->bands[i];
383 		band->tail_md_ppa = ftl_band_tail_md_ppa(band);
384 	}
385 
386 	ftl_remove_empty_bands(dev);
387 out:
388 	free(info);
389 	return rc;
390 }
391 
392 static int
393 ftl_dev_init_punits(struct spdk_ftl_dev *dev)
394 {
395 	unsigned int i, punit;
396 
397 	dev->punits = calloc(ftl_dev_num_punits(dev), sizeof(*dev->punits));
398 	if (!dev->punits) {
399 		return -1;
400 	}
401 
402 	for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
403 		dev->punits[i].dev = dev;
404 		punit = dev->range.begin + i;
405 
406 		dev->punits[i].start_ppa.ppa = 0;
407 		dev->punits[i].start_ppa.grp = punit % dev->geo.num_grp;
408 		dev->punits[i].start_ppa.pu = punit / dev->geo.num_grp;
409 	}
410 
411 	return 0;
412 }
413 
414 static int
415 ftl_dev_retrieve_geo(struct spdk_ftl_dev *dev)
416 {
417 	volatile struct ftl_admin_cmpl cmpl = {};
418 	uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
419 
420 	if (spdk_nvme_ocssd_ctrlr_cmd_geometry(dev->ctrlr, nsid, &dev->geo, sizeof(dev->geo),
421 					       ftl_admin_cb, (void *)&cmpl)) {
422 		SPDK_ERRLOG("Unable to retrieve geometry\n");
423 		return -1;
424 	}
425 
426 	/* TODO: add a timeout */
427 	while (!cmpl.complete) {
428 		spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
429 	}
430 
431 	if (spdk_nvme_cpl_is_error(&cmpl.status)) {
432 		SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
433 			    cmpl.status.status.sc, cmpl.status.status.sct);
434 		return -1;
435 	}
436 
437 	/* TODO: add sanity checks for the geo */
438 	dev->ppa_len = dev->geo.lbaf.grp_len +
439 		       dev->geo.lbaf.pu_len +
440 		       dev->geo.lbaf.chk_len +
441 		       dev->geo.lbaf.lbk_len;
442 
443 	dev->ppaf.lbk_offset = 0;
444 	dev->ppaf.lbk_mask   = (1 << dev->geo.lbaf.lbk_len) - 1;
445 	dev->ppaf.chk_offset = dev->ppaf.lbk_offset + dev->geo.lbaf.lbk_len;
446 	dev->ppaf.chk_mask   = (1 << dev->geo.lbaf.chk_len) - 1;
447 	dev->ppaf.pu_offset  = dev->ppaf.chk_offset + dev->geo.lbaf.chk_len;
448 	dev->ppaf.pu_mask    = (1 << dev->geo.lbaf.pu_len) - 1;
449 	dev->ppaf.grp_offset = dev->ppaf.pu_offset + dev->geo.lbaf.pu_len;
450 	dev->ppaf.grp_mask   = (1 << dev->geo.lbaf.grp_len) - 1;
451 
452 	/* We're using optimal write size as our xfer size */
453 	dev->xfer_size = dev->geo.ws_opt;
454 
455 	return 0;
456 }
457 
458 static int
459 ftl_dev_nvme_init(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
460 {
461 	uint32_t block_size;
462 
463 	dev->ctrlr = opts->ctrlr;
464 
465 	if (spdk_nvme_ctrlr_get_num_ns(dev->ctrlr) != 1) {
466 		SPDK_ERRLOG("Unsupported number of namespaces\n");
467 		return -1;
468 	}
469 
470 	dev->ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, FTL_NSID);
471 	dev->trid = opts->trid;
472 	dev->md_size = spdk_nvme_ns_get_md_size(dev->ns);
473 
474 	block_size = spdk_nvme_ns_get_extended_sector_size(dev->ns);
475 	if (block_size != FTL_BLOCK_SIZE) {
476 		SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
477 		return -1;
478 	}
479 
480 	if (dev->md_size % sizeof(uint32_t) != 0) {
481 		/* Metadata pointer must be dword aligned */
482 		SPDK_ERRLOG("Unsupported metadata size (%zu)\n", dev->md_size);
483 		return -1;
484 	}
485 
486 	return 0;
487 }
488 
489 static int
490 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc)
491 {
492 	struct spdk_bdev *bdev;
493 	struct spdk_ftl_conf *conf = &dev->conf;
494 	struct ftl_nv_cache *nv_cache = &dev->nv_cache;
495 	char pool_name[128];
496 	int rc;
497 
498 	if (!bdev_desc) {
499 		return 0;
500 	}
501 
502 	bdev = spdk_bdev_desc_get_bdev(bdev_desc);
503 	SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
504 		     spdk_bdev_get_name(bdev));
505 
506 	if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
507 		SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
508 		return -1;
509 	}
510 
511 	if (!spdk_bdev_is_md_separate(bdev)) {
512 		SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
513 			    spdk_bdev_get_name(bdev));
514 		return -1;
515 	}
516 
517 	if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
518 		SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
519 			    spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
520 		return -1;
521 	}
522 
523 	/* The cache needs to be capable of storing at least two full bands. This requirement comes
524 	 * from the fact that cache works as a protection against power loss, so before the data
525 	 * inside the cache can be overwritten, the band it's stored on has to be closed.
526 	 */
527 	if (spdk_bdev_get_num_blocks(bdev) < ftl_num_band_lbks(dev) * 2) {
528 		SPDK_ERRLOG("Insufficient number of blocks for write buffer cache(%"PRIu64"\n",
529 			    spdk_bdev_get_num_blocks(bdev));
530 		return -1;
531 	}
532 
533 	rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
534 	if (rc < 0 || rc >= 128) {
535 		return -1;
536 	}
537 
538 	nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
539 						spdk_bdev_get_md_size(bdev) *
540 						conf->nv_cache.max_request_size,
541 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
542 						SPDK_ENV_SOCKET_ID_ANY);
543 	if (!nv_cache->md_pool) {
544 		SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
545 		return -1;
546 	}
547 
548 	if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
549 		SPDK_ERRLOG("Failed to initialize cache lock\n");
550 		return -1;
551 	}
552 
553 	nv_cache->bdev_desc = bdev_desc;
554 	nv_cache->current_addr = 0;
555 	nv_cache->num_available = spdk_bdev_get_num_blocks(bdev);
556 
557 	return 0;
558 }
559 
560 void
561 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
562 {
563 	*conf = g_default_conf;
564 }
565 
566 static void
567 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
568 {
569 	struct ftl_lba_map_request *request = obj;
570 	struct spdk_ftl_dev *dev = opaque;
571 
572 	request->segments = spdk_bit_array_create(spdk_divide_round_up(
573 				    ftl_num_band_lbks(dev), FTL_NUM_LBA_IN_BLOCK));
574 }
575 
576 static int
577 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
578 {
579 #define POOL_NAME_LEN 128
580 	char pool_name[POOL_NAME_LEN];
581 	int rc;
582 
583 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lba-pool");
584 	if (rc < 0 || rc >= POOL_NAME_LEN) {
585 		return -ENAMETOOLONG;
586 	}
587 
588 	/* We need to reserve at least 2 buffers for band close / open sequence
589 	 * alone, plus additional (8) buffers for handling write errors.
590 	 * TODO: This memory pool is utilized only by core thread - it introduce
591 	 * unnecessary overhead and should be replaced by different data structure.
592 	 */
593 	dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
594 					    ftl_lba_map_pool_elem_size(dev),
595 					    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
596 					    SPDK_ENV_SOCKET_ID_ANY);
597 	if (!dev->lba_pool) {
598 		return -ENOMEM;
599 	}
600 
601 	rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lbareq-pool");
602 	if (rc < 0 || rc >= POOL_NAME_LEN) {
603 		return -ENAMETOOLONG;
604 	}
605 
606 	dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
607 				dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
608 				sizeof(struct ftl_lba_map_request),
609 				SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
610 				SPDK_ENV_SOCKET_ID_ANY,
611 				ftl_lba_map_request_ctor,
612 				dev);
613 	if (!dev->lba_request_pool) {
614 		return -ENOMEM;
615 	}
616 
617 	return 0;
618 }
619 
620 static void
621 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
622 {
623 	LIST_INIT(&dev->wptr_list);
624 	LIST_INIT(&dev->flush_list);
625 }
626 
627 static size_t
628 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
629 {
630 	struct ftl_band *band;
631 	size_t seq = 0;
632 
633 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
634 		if (band->seq > seq) {
635 			seq = band->seq;
636 		}
637 	}
638 
639 	return seq;
640 }
641 
642 static void
643 _ftl_init_bands_state(void *ctx)
644 {
645 	struct ftl_band *band, *temp_band;
646 	struct spdk_ftl_dev *dev = ctx;
647 
648 	dev->seq = ftl_dev_band_max_seq(dev);
649 
650 	LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
651 		if (!band->lba_map.num_vld) {
652 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
653 		}
654 	}
655 
656 	ftl_reloc_resume(dev->reloc);
657 	/* Clear the limit applications as they're incremented incorrectly by */
658 	/* the initialization code */
659 	memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
660 }
661 
662 static int
663 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
664 {
665 	struct ftl_band *band;
666 	int cnt = 0;
667 
668 	LIST_FOREACH(band, &dev->shut_bands, list_entry) {
669 		if (band->num_chunks && !band->lba_map.num_vld) {
670 			cnt++;
671 		}
672 	}
673 	return cnt;
674 }
675 
676 static int
677 ftl_init_bands_state(struct spdk_ftl_dev *dev)
678 {
679 	/* TODO: Should we abort initialization or expose read only device */
680 	/* if there is no free bands? */
681 	/* If we abort initialization should we depend on condition that */
682 	/* we have no free bands or should we have some minimal number of */
683 	/* free bands? */
684 	if (!ftl_init_num_free_bands(dev)) {
685 		return -1;
686 	}
687 
688 	spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
689 	return 0;
690 }
691 
692 static void
693 _ftl_dev_init_thread(void *ctx)
694 {
695 	struct ftl_thread *thread = ctx;
696 	struct spdk_ftl_dev *dev = thread->dev;
697 
698 	thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
699 	if (!thread->poller) {
700 		SPDK_ERRLOG("Unable to register poller\n");
701 		assert(0);
702 	}
703 
704 	if (spdk_get_thread() == ftl_get_core_thread(dev)) {
705 		ftl_anm_register_device(dev, ftl_process_anm_event);
706 	}
707 }
708 
709 static int
710 ftl_dev_init_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread,
711 		    struct spdk_thread *spdk_thread, spdk_poller_fn fn, uint64_t period_us)
712 {
713 	thread->dev = dev;
714 	thread->poller_fn = fn;
715 	thread->thread = spdk_thread;
716 	thread->period_us = period_us;
717 
718 	thread->qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0);
719 	if (!thread->qpair) {
720 		SPDK_ERRLOG("Unable to initialize qpair\n");
721 		return -1;
722 	}
723 
724 	spdk_thread_send_msg(spdk_thread, _ftl_dev_init_thread, thread);
725 	return 0;
726 }
727 
728 static int
729 ftl_dev_init_threads(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
730 {
731 	if (!opts->core_thread || !opts->read_thread) {
732 		return -1;
733 	}
734 
735 	if (ftl_dev_init_thread(dev, &dev->core_thread, opts->core_thread, ftl_task_core, 0)) {
736 		SPDK_ERRLOG("Unable to initialize core thread\n");
737 		return -1;
738 	}
739 
740 	if (ftl_dev_init_thread(dev, &dev->read_thread, opts->read_thread, ftl_task_read, 0)) {
741 		SPDK_ERRLOG("Unable to initialize read thread\n");
742 		return -1;
743 	}
744 
745 	return 0;
746 }
747 
748 static void
749 ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
750 {
751 	assert(thread->poller == NULL);
752 
753 	spdk_nvme_ctrlr_free_io_qpair(thread->qpair);
754 	thread->thread = NULL;
755 	thread->qpair = NULL;
756 }
757 
758 static int
759 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
760 {
761 	size_t addr_size;
762 	uint64_t i;
763 
764 	if (dev->num_lbas == 0) {
765 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
766 		return -1;
767 	}
768 
769 	if (dev->l2p) {
770 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
771 		return -1;
772 	}
773 
774 	addr_size = dev->ppa_len >= 32 ? 8 : 4;
775 	dev->l2p = malloc(dev->num_lbas * addr_size);
776 	if (!dev->l2p) {
777 		SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
778 		return -1;
779 	}
780 
781 	for (i = 0; i < dev->num_lbas; ++i) {
782 		ftl_l2p_set(dev, i, ftl_to_ppa(FTL_PPA_INVALID));
783 	}
784 
785 	return 0;
786 }
787 
788 static void
789 ftl_init_complete(struct spdk_ftl_dev *dev)
790 {
791 	pthread_mutex_lock(&g_ftl_queue_lock);
792 	STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
793 	pthread_mutex_unlock(&g_ftl_queue_lock);
794 
795 	dev->initialized = 1;
796 
797 	if (dev->init_cb) {
798 		dev->init_cb(dev, dev->init_arg, 0);
799 	}
800 
801 	dev->init_cb = NULL;
802 	dev->init_arg = NULL;
803 }
804 
805 static int
806 ftl_setup_initial_state(struct spdk_ftl_dev *dev)
807 {
808 	struct spdk_ftl_conf *conf = &dev->conf;
809 	size_t i;
810 
811 	spdk_uuid_generate(&dev->uuid);
812 
813 	dev->num_lbas = 0;
814 	for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
815 		dev->num_lbas += ftl_band_num_usable_lbks(&dev->bands[i]);
816 	}
817 
818 	dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
819 
820 	if (ftl_dev_l2p_alloc(dev)) {
821 		SPDK_ERRLOG("Unable to init l2p table\n");
822 		return -1;
823 	}
824 
825 	if (ftl_init_bands_state(dev)) {
826 		SPDK_ERRLOG("Unable to finish the initialization\n");
827 		return -1;
828 	}
829 
830 	ftl_init_complete(dev);
831 	return 0;
832 }
833 
834 struct ftl_init_fail_ctx {
835 	spdk_ftl_init_fn	cb;
836 	void			*arg;
837 };
838 
839 static void
840 ftl_init_fail_cb(void *ctx, int status)
841 {
842 	struct ftl_init_fail_ctx *fail_cb = ctx;
843 
844 	fail_cb->cb(NULL, fail_cb->arg, -ENODEV);
845 	free(fail_cb);
846 }
847 
848 static void
849 ftl_init_fail(struct spdk_ftl_dev *dev)
850 {
851 	struct ftl_init_fail_ctx *fail_cb;
852 
853 	fail_cb = malloc(sizeof(*fail_cb));
854 	if (!fail_cb) {
855 		SPDK_ERRLOG("Unable to allocate context to free the device\n");
856 		return;
857 	}
858 
859 	fail_cb->cb = dev->init_cb;
860 	fail_cb->arg = dev->init_arg;
861 	dev->halt_cb = NULL;
862 
863 	if (spdk_ftl_dev_free(dev, ftl_init_fail_cb, fail_cb)) {
864 		SPDK_ERRLOG("Unable to free the device\n");
865 		assert(0);
866 	}
867 }
868 
869 static void
870 ftl_restore_device_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
871 {
872 	if (status) {
873 		SPDK_ERRLOG("Failed to restore the device from the SSD\n");
874 		goto error;
875 	}
876 
877 	if (ftl_init_bands_state(dev)) {
878 		SPDK_ERRLOG("Unable to finish the initialization\n");
879 		goto error;
880 	}
881 
882 	ftl_init_complete(dev);
883 	return;
884 error:
885 	ftl_init_fail(dev);
886 }
887 
888 static void
889 ftl_restore_md_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
890 {
891 	if (status) {
892 		SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
893 		goto error;
894 	}
895 
896 	/* After the metadata is read it should be possible to allocate the L2P */
897 	if (ftl_dev_l2p_alloc(dev)) {
898 		SPDK_ERRLOG("Failed to allocate the L2P\n");
899 		goto error;
900 	}
901 
902 	if (ftl_restore_device(restore, ftl_restore_device_cb)) {
903 		SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
904 		goto error;
905 	}
906 
907 	return;
908 error:
909 	ftl_init_fail(dev);
910 }
911 
912 static int
913 ftl_restore_state(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
914 {
915 	dev->uuid = opts->uuid;
916 
917 	if (ftl_restore_md(dev, ftl_restore_md_cb)) {
918 		SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
919 		return -1;
920 	}
921 
922 	return 0;
923 }
924 
925 static int
926 ftl_io_channel_create_cb(void *io_device, void *ctx)
927 {
928 	struct spdk_ftl_dev *dev = io_device;
929 	struct ftl_io_channel *ioch = ctx;
930 	char mempool_name[32];
931 
932 	snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
933 	ioch->cache_ioch = NULL;
934 	ioch->dev = dev;
935 	ioch->elem_size = sizeof(struct ftl_md_io);
936 	ioch->io_pool = spdk_mempool_create(mempool_name,
937 					    dev->conf.user_io_pool_size,
938 					    ioch->elem_size,
939 					    0,
940 					    SPDK_ENV_SOCKET_ID_ANY);
941 	if (!ioch->io_pool) {
942 		SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
943 		return -1;
944 	}
945 
946 	if (dev->nv_cache.bdev_desc) {
947 		ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
948 		if (!ioch->cache_ioch) {
949 			SPDK_ERRLOG("Failed to create cache IO channel\n");
950 			spdk_mempool_free(ioch->io_pool);
951 			return -1;
952 		}
953 	}
954 
955 	return 0;
956 }
957 
958 static void
959 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
960 {
961 	struct ftl_io_channel *ioch = ctx;
962 
963 	spdk_mempool_free(ioch->io_pool);
964 
965 	if (ioch->cache_ioch) {
966 		spdk_put_io_channel(ioch->cache_ioch);
967 	}
968 }
969 
970 static int
971 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
972 {
973 	spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
974 				sizeof(struct ftl_io_channel),
975 				NULL);
976 
977 	dev->ioch = spdk_get_io_channel(dev);
978 	if (!dev->ioch) {
979 		spdk_io_device_unregister(dev, NULL);
980 		return -1;
981 	}
982 
983 	return 0;
984 }
985 
986 int
987 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb, void *cb_arg)
988 {
989 	struct spdk_ftl_dev *dev;
990 	struct spdk_ftl_dev_init_opts opts = *_opts;
991 
992 	dev = calloc(1, sizeof(*dev));
993 	if (!dev) {
994 		return -ENOMEM;
995 	}
996 
997 	if (!opts.conf) {
998 		opts.conf = &g_default_conf;
999 	}
1000 
1001 	TAILQ_INIT(&dev->retry_queue);
1002 	dev->conf = *opts.conf;
1003 	dev->init_cb = cb;
1004 	dev->init_arg = cb_arg;
1005 	dev->range = opts.range;
1006 	dev->limit = SPDK_FTL_LIMIT_MAX;
1007 
1008 	dev->name = strdup(opts.name);
1009 	if (!dev->name) {
1010 		SPDK_ERRLOG("Unable to set device name\n");
1011 		goto fail_sync;
1012 	}
1013 
1014 	if (ftl_dev_nvme_init(dev, &opts)) {
1015 		SPDK_ERRLOG("Unable to initialize NVMe structures\n");
1016 		goto fail_sync;
1017 	}
1018 
1019 	/* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
1020 	/* so we don't have to clean up in each of the init functions. */
1021 	if (ftl_dev_retrieve_geo(dev)) {
1022 		SPDK_ERRLOG("Unable to retrieve geometry\n");
1023 		goto fail_sync;
1024 	}
1025 
1026 	if (ftl_check_init_opts(&opts, &dev->geo)) {
1027 		SPDK_ERRLOG("Invalid device configuration\n");
1028 		goto fail_sync;
1029 	}
1030 
1031 	if (ftl_dev_init_punits(dev)) {
1032 		SPDK_ERRLOG("Unable to initialize LUNs\n");
1033 		goto fail_sync;
1034 	}
1035 
1036 	if (ftl_init_lba_map_pools(dev)) {
1037 		SPDK_ERRLOG("Unable to init LBA map pools\n");
1038 		goto fail_sync;
1039 	}
1040 
1041 	ftl_init_wptr_list(dev);
1042 
1043 	if (ftl_dev_init_bands(dev)) {
1044 		SPDK_ERRLOG("Unable to initialize band array\n");
1045 		goto fail_sync;
1046 	}
1047 
1048 	if (ftl_dev_init_nv_cache(dev, opts.cache_bdev_desc)) {
1049 		SPDK_ERRLOG("Unable to initialize persistent cache\n");
1050 		goto fail_sync;
1051 	}
1052 
1053 	dev->rwb = ftl_rwb_init(&dev->conf, dev->geo.ws_opt, dev->md_size, ftl_dev_num_punits(dev));
1054 	if (!dev->rwb) {
1055 		SPDK_ERRLOG("Unable to initialize rwb structures\n");
1056 		goto fail_sync;
1057 	}
1058 
1059 	dev->reloc = ftl_reloc_init(dev);
1060 	if (!dev->reloc) {
1061 		SPDK_ERRLOG("Unable to initialize reloc structures\n");
1062 		goto fail_sync;
1063 	}
1064 
1065 	if (ftl_dev_init_io_channel(dev)) {
1066 		SPDK_ERRLOG("Unable to initialize IO channels\n");
1067 		goto fail_sync;
1068 	}
1069 
1070 	if (ftl_dev_init_threads(dev, &opts)) {
1071 		SPDK_ERRLOG("Unable to initialize device threads\n");
1072 		goto fail_sync;
1073 	}
1074 
1075 	if (opts.mode & SPDK_FTL_MODE_CREATE) {
1076 		if (ftl_setup_initial_state(dev)) {
1077 			SPDK_ERRLOG("Failed to setup initial state of the device\n");
1078 			goto fail_async;
1079 		}
1080 	} else {
1081 		if (ftl_restore_state(dev, &opts)) {
1082 			SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
1083 			goto fail_async;
1084 		}
1085 	}
1086 
1087 	return 0;
1088 fail_sync:
1089 	ftl_dev_free_sync(dev);
1090 	return -ENOMEM;
1091 fail_async:
1092 	ftl_init_fail(dev);
1093 	return 0;
1094 }
1095 
1096 static void
1097 _ftl_halt_defrag(void *arg)
1098 {
1099 	ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1100 }
1101 
1102 static void
1103 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1104 {
1105 	struct ftl_lba_map_request *request = obj;
1106 
1107 	spdk_bit_array_free(&request->segments);
1108 }
1109 
1110 static void
1111 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1112 {
1113 	struct spdk_ftl_dev *iter;
1114 	size_t i;
1115 
1116 	if (!dev) {
1117 		return;
1118 	}
1119 
1120 	pthread_mutex_lock(&g_ftl_queue_lock);
1121 	STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1122 		if (iter == dev) {
1123 			STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1124 			break;
1125 		}
1126 	}
1127 	pthread_mutex_unlock(&g_ftl_queue_lock);
1128 
1129 	assert(LIST_EMPTY(&dev->wptr_list));
1130 
1131 	ftl_dev_dump_bands(dev);
1132 	ftl_dev_dump_stats(dev);
1133 
1134 	if (dev->ioch) {
1135 		spdk_put_io_channel(dev->ioch);
1136 		spdk_io_device_unregister(dev, NULL);
1137 	}
1138 
1139 	if (dev->bands) {
1140 		for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
1141 			free(dev->bands[i].chunk_buf);
1142 			spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1143 		}
1144 	}
1145 
1146 	spdk_mempool_free(dev->lba_pool);
1147 	spdk_mempool_free(dev->nv_cache.md_pool);
1148 	if (dev->lba_request_pool) {
1149 		spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1150 	}
1151 	spdk_mempool_free(dev->lba_request_pool);
1152 
1153 	ftl_rwb_free(dev->rwb);
1154 	ftl_reloc_free(dev->reloc);
1155 
1156 	free(dev->name);
1157 	free(dev->punits);
1158 	free(dev->bands);
1159 	free(dev->l2p);
1160 	free(dev);
1161 }
1162 
1163 static int
1164 ftl_halt_poller(void *ctx)
1165 {
1166 	struct spdk_ftl_dev *dev = ctx;
1167 	spdk_ftl_fn halt_cb = dev->halt_cb;
1168 	void *halt_arg = dev->halt_arg;
1169 
1170 	if (!dev->core_thread.poller && !dev->read_thread.poller) {
1171 		spdk_poller_unregister(&dev->halt_poller);
1172 
1173 		ftl_dev_free_thread(dev, &dev->read_thread);
1174 		ftl_dev_free_thread(dev, &dev->core_thread);
1175 
1176 		ftl_anm_unregister_device(dev);
1177 		ftl_dev_free_sync(dev);
1178 
1179 		if (halt_cb) {
1180 			halt_cb(halt_arg, 0);
1181 		}
1182 	}
1183 
1184 	return 0;
1185 }
1186 
1187 static void
1188 ftl_add_halt_poller(void *ctx)
1189 {
1190 	struct spdk_ftl_dev *dev = ctx;
1191 
1192 	_ftl_halt_defrag(dev);
1193 
1194 	assert(!dev->halt_poller);
1195 	dev->halt_poller = spdk_poller_register(ftl_halt_poller, dev, 100);
1196 }
1197 
1198 int
1199 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_fn cb, void *cb_arg)
1200 {
1201 	if (dev->halt_cb) {
1202 		return -EBUSY;
1203 	}
1204 
1205 	dev->halt_cb = cb;
1206 	dev->halt_arg = cb_arg;
1207 	dev->halt = 1;
1208 
1209 	ftl_rwb_disable_interleaving(dev->rwb);
1210 
1211 	spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, dev);
1212 	return 0;
1213 }
1214 
1215 int
1216 spdk_ftl_module_init(const struct ftl_module_init_opts *opts, spdk_ftl_fn cb, void *cb_arg)
1217 {
1218 	return ftl_anm_init(opts->anm_thread, cb, cb_arg);
1219 }
1220 
1221 int
1222 spdk_ftl_module_fini(spdk_ftl_fn cb, void *cb_arg)
1223 {
1224 	return ftl_anm_free(cb, cb_arg);
1225 }
1226 
1227 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
1228