xref: /spdk/lib/ftl/mngt/ftl_mngt_band.c (revision 8afdeef3becfe9409cc9e7372bd0bc10e8b7d46d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "ftl_core.h"
7 #include "ftl_mngt_steps.h"
8 #include "ftl_band.h"
9 #include "ftl_internal.h"
10 
11 static int
12 ftl_band_init_md(struct ftl_band *band)
13 {
14 	struct spdk_ftl_dev *dev = band->dev;
15 	struct ftl_p2l_map *p2l_map = &band->p2l_map;
16 	struct ftl_md *band_info_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
17 	struct ftl_md *valid_map_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_VALID_MAP];
18 	uint64_t band_num_blocks = ftl_get_num_blocks_in_band(band->dev);
19 	size_t band_valid_map_bytes;
20 	struct ftl_band_md *band_md = ftl_md_get_buffer(band_info_md);
21 
22 	if (band_num_blocks % (ftl_bitmap_buffer_alignment * 8)) {
23 		FTL_ERRLOG(dev, "The number of blocks in band is not divisible by bitmap word bits\n");
24 		return -EINVAL;
25 	}
26 	band_valid_map_bytes = band_num_blocks / 8;
27 
28 	p2l_map->valid = ftl_bitmap_create(ftl_md_get_buffer(valid_map_md) +
29 					   band->start_addr / 8, band_valid_map_bytes);
30 	if (!p2l_map->valid) {
31 		return -ENOMEM;
32 	}
33 
34 	band->md = &band_md[band->id];
35 	band->md->version = FTL_BAND_VERSION_CURRENT;
36 	if (!ftl_fast_startup(dev)) {
37 		band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
38 	}
39 
40 	return 0;
41 }
42 
43 static int
44 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
45 {
46 	struct ftl_band *band;
47 	uint64_t i, blocks, md_blocks, md_bands;
48 
49 	/* Calculate initial number of bands */
50 	blocks = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
51 	dev->num_bands = blocks / ftl_get_num_blocks_in_band(dev);
52 
53 	/* Calculate number of bands considering base device metadata size requirement */
54 	md_blocks = ftl_layout_base_md_blocks(dev);
55 	md_bands = spdk_divide_round_up(md_blocks, dev->num_blocks_in_band);
56 
57 	if (dev->num_bands > md_bands) {
58 		/* Save a band worth of space for metadata */
59 		dev->num_bands -= md_bands;
60 	} else {
61 		FTL_ERRLOG(dev, "Base device too small to store metadata\n");
62 		return -1;
63 	}
64 
65 	TAILQ_INIT(&dev->free_bands);
66 	TAILQ_INIT(&dev->shut_bands);
67 
68 	dev->num_free = 0;
69 	dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
70 	if (!dev->bands) {
71 		return -ENOMEM;
72 	}
73 
74 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
75 		band = &dev->bands[i];
76 		band->id = i;
77 		band->dev = dev;
78 
79 		/* Adding to shut_bands is necessary - see ftl_restore_band_close_cb() */
80 		TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
81 	}
82 
83 	return 0;
84 }
85 
86 static int
87 ftl_dev_init_bands_md(struct spdk_ftl_dev *dev)
88 {
89 	uint64_t i;
90 	int rc = 0;
91 
92 	for (i = 0; i < ftl_get_num_bands(dev); ++i) {
93 		rc = ftl_band_init_md(&dev->bands[i]);
94 		if (rc) {
95 			FTL_ERRLOG(dev, "Failed to initialize metadata structures for band [%lu]\n", i);
96 			break;
97 		}
98 	}
99 
100 	return rc;
101 }
102 
103 static void
104 ftl_dev_deinit_bands(struct spdk_ftl_dev *dev)
105 {
106 	free(dev->bands);
107 }
108 
109 static void
110 ftl_dev_deinit_bands_md(struct spdk_ftl_dev *dev)
111 {
112 	if (dev->bands) {
113 		uint64_t i;
114 		for (i = 0; i < dev->num_bands; ++i) {
115 			struct ftl_band *band = &dev->bands[i];
116 
117 			ftl_bitmap_destroy(band->p2l_map.valid);
118 			band->p2l_map.valid = NULL;
119 
120 			band->md = NULL;
121 		}
122 	}
123 }
124 
125 void
126 ftl_mngt_init_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
127 {
128 	if (ftl_dev_init_bands(dev)) {
129 		ftl_mngt_fail_step(mngt);
130 	} else {
131 		ftl_mngt_next_step(mngt);
132 	}
133 }
134 
135 void
136 ftl_mngt_init_bands_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
137 {
138 	if (ftl_dev_init_bands_md(dev)) {
139 		ftl_mngt_fail_step(mngt);
140 	} else {
141 		ftl_mngt_next_step(mngt);
142 	}
143 }
144 
145 void
146 ftl_mngt_deinit_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
147 {
148 	ftl_dev_deinit_bands(dev);
149 	ftl_mngt_next_step(mngt);
150 }
151 
152 void
153 ftl_mngt_deinit_bands_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
154 {
155 	ftl_dev_deinit_bands_md(dev);
156 	ftl_mngt_next_step(mngt);
157 }
158 
159 /*
160  * For grouping multiple logical bands (1GiB) to make any IOs more sequential from the drive's
161  * perspective. Improves WAF.
162  */
163 #define BASE_BDEV_RECLAIM_UNIT_SIZE (72 * GiB)
164 
165 static void
166 decorate_bands(struct spdk_ftl_dev *dev)
167 {
168 	struct ftl_band *band;
169 	uint64_t i, num_to_drop, phys_id = 0;
170 	uint64_t num_blocks, num_bands;
171 	uint64_t num_blocks_in_band = ftl_get_num_blocks_in_band(dev);
172 	uint64_t reclaim_unit_num_blocks = BASE_BDEV_RECLAIM_UNIT_SIZE / FTL_BLOCK_SIZE;
173 	uint32_t num_logical_in_phys = 2;
174 
175 	assert(reclaim_unit_num_blocks % num_blocks_in_band == 0);
176 
177 	num_blocks = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
178 
179 	/* For base bdev bigger than 1TB take reclaim uint size for grouping GC bands */
180 	if (num_blocks > (TiB / FTL_BLOCK_SIZE)) {
181 		assert(reclaim_unit_num_blocks < num_blocks);
182 		num_logical_in_phys = reclaim_unit_num_blocks / num_blocks_in_band;
183 	}
184 
185 	num_to_drop = ftl_get_num_bands(dev) % num_logical_in_phys;
186 
187 	i = 0;
188 	while (i < ftl_get_num_bands(dev) - num_to_drop) {
189 		band = &dev->bands[i];
190 
191 		band->phys_id = phys_id;
192 		i++;
193 		if (i % num_logical_in_phys == 0) {
194 			phys_id++;
195 		}
196 	}
197 
198 	/* Mark not aligned logical bands as broken */
199 	num_bands = ftl_get_num_bands(dev);
200 	while (i < num_bands) {
201 		band = &dev->bands[i];
202 		dev->num_bands--;
203 		TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
204 		i++;
205 	}
206 
207 	dev->num_logical_bands_in_physical = num_logical_in_phys;
208 }
209 
210 void
211 ftl_mngt_decorate_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
212 {
213 	decorate_bands(dev);
214 	ftl_mngt_next_step(mngt);
215 }
216 
217 void
218 ftl_mngt_initialize_band_address(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
219 {
220 	struct ftl_band *band;
221 	struct ftl_md *data_md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_DATA_BASE];
222 	uint64_t i;
223 
224 	for (i = 0; i < ftl_get_num_bands(dev); i++) {
225 		band = &dev->bands[i];
226 		band->start_addr = data_md->region->current.offset + i * dev->num_blocks_in_band;
227 		band->tail_md_addr = ftl_band_tail_md_addr(band);
228 	}
229 
230 	ftl_mngt_next_step(mngt);
231 }
232 
233 void
234 ftl_recover_max_seq(struct spdk_ftl_dev *dev)
235 {
236 	struct ftl_band *band;
237 	uint64_t band_close_seq_id = 0, band_open_seq_id = 0;
238 	uint64_t chunk_close_seq_id = 0, chunk_open_seq_id = 0;
239 	uint64_t max = 0;
240 
241 	TAILQ_FOREACH(band, &dev->shut_bands, queue_entry) {
242 		band_open_seq_id = spdk_max(band_open_seq_id, band->md->seq);
243 		band_close_seq_id = spdk_max(band_close_seq_id, band->md->close_seq_id);
244 	}
245 	ftl_nv_cache_get_max_seq_id(&dev->nv_cache, &chunk_open_seq_id, &chunk_close_seq_id);
246 
247 
248 	dev->nv_cache.last_seq_id = chunk_close_seq_id;
249 	dev->writer_gc.last_seq_id = band_close_seq_id;
250 	dev->writer_user.last_seq_id = band_close_seq_id;
251 
252 	max = spdk_max(max, band_open_seq_id);
253 	max = spdk_max(max, band_close_seq_id);
254 	max = spdk_max(max, chunk_open_seq_id);
255 	max = spdk_max(max, chunk_close_seq_id);
256 
257 	dev->sb->seq_id = max;
258 }
259 
260 static int
261 _band_cmp(const void *_a, const void *_b)
262 {
263 	struct ftl_band *a, *b;
264 
265 	a = *((struct ftl_band **)_a);
266 	b = *((struct ftl_band **)_b);
267 
268 	return a->md->seq - b->md->seq;
269 }
270 
271 static struct ftl_band *
272 next_high_prio_band(struct spdk_ftl_dev *dev)
273 {
274 	struct ftl_band *result = NULL, *band;
275 	uint64_t validity = UINT64_MAX;
276 
277 	TAILQ_FOREACH(band, &dev->shut_bands, queue_entry) {
278 		if (band->p2l_map.num_valid < validity) {
279 			result = band;
280 			validity = result->p2l_map.num_valid;
281 		}
282 	}
283 
284 	return result;
285 }
286 
287 static int
288 finalize_init_gc(struct spdk_ftl_dev *dev)
289 {
290 	struct ftl_band *band;
291 	uint64_t free_blocks, blocks_to_move;
292 
293 	ftl_band_init_gc_iter(dev);
294 	dev->sb_shm->gc_info.band_id_high_prio = FTL_BAND_ID_INVALID;
295 
296 	if (0 == dev->num_free) {
297 		/* Get number of available blocks in writer */
298 		free_blocks = ftl_writer_get_free_blocks(&dev->writer_gc);
299 
300 		/*
301 		 * First, check a band candidate to GC
302 		 */
303 		band = ftl_band_search_next_to_reloc(dev);
304 		ftl_bug(NULL == band);
305 		blocks_to_move = band->p2l_map.num_valid;
306 		if (blocks_to_move <= free_blocks) {
307 			/* This GC band can be moved */
308 			return 0;
309 		}
310 
311 		/*
312 		 * The GC candidate cannot be moved because no enough space. We need to find
313 		 * another band.
314 		 */
315 		band = next_high_prio_band(dev);
316 		ftl_bug(NULL == band);
317 
318 		if (band->p2l_map.num_valid > free_blocks) {
319 			FTL_ERRLOG(dev, "CRITICAL ERROR, no more free bands and cannot start\n");
320 			return -1;
321 		} else {
322 			/* GC needs to start using this band */
323 			dev->sb_shm->gc_info.band_id_high_prio = band->id;
324 		}
325 	}
326 
327 	return 0;
328 }
329 
330 static void
331 ftl_property_dump_base_dev(struct spdk_ftl_dev *dev, const struct ftl_property *property,
332 			   struct spdk_json_write_ctx *w)
333 {
334 	uint64_t i;
335 	struct ftl_band *band;
336 
337 	spdk_json_write_named_array_begin(w, "bands");
338 	for (i = 0, band = dev->bands; i < ftl_get_num_bands(dev); i++, band++) {
339 		spdk_json_write_object_begin(w);
340 		spdk_json_write_named_uint64(w, "id", i);
341 		spdk_json_write_named_string(w, "state", ftl_band_get_state_name(band));
342 		spdk_json_write_named_double(w, "validity", 1.0 - ftl_band_invalidity(band));
343 		spdk_json_write_object_end(w);
344 	}
345 	spdk_json_write_array_end(w);
346 }
347 
348 void
349 ftl_mngt_finalize_init_bands(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
350 {
351 	struct ftl_band *band, *temp_band, *open_bands[FTL_MAX_OPEN_BANDS];
352 	struct ftl_writer *writer = NULL;
353 	uint64_t i, num_open = 0, num_shut = 0;
354 	uint64_t offset;
355 	bool fast_startup = ftl_fast_startup(dev);
356 
357 	ftl_recover_max_seq(dev);
358 	ftl_property_register(dev, "base_device", NULL, 0, NULL, NULL, ftl_property_dump_base_dev, NULL,
359 			      NULL, true);
360 
361 	TAILQ_FOREACH_SAFE(band, &dev->free_bands, queue_entry, temp_band) {
362 		band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
363 	}
364 
365 	TAILQ_FOREACH_SAFE(band, &dev->shut_bands, queue_entry, temp_band) {
366 		if (band->md->state == FTL_BAND_STATE_OPEN ||
367 		    band->md->state == FTL_BAND_STATE_FULL) {
368 			TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
369 			open_bands[num_open++] = band;
370 			assert(num_open <= FTL_MAX_OPEN_BANDS);
371 			continue;
372 		}
373 
374 		if (dev->conf.mode & SPDK_FTL_MODE_CREATE) {
375 			TAILQ_REMOVE(&dev->shut_bands, band, queue_entry);
376 			assert(band->md->state == FTL_BAND_STATE_FREE);
377 			band->md->state = FTL_BAND_STATE_CLOSED;
378 			ftl_band_set_state(band, FTL_BAND_STATE_FREE);
379 		} else {
380 			num_shut++;
381 		}
382 
383 		band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
384 	}
385 
386 	/* Assign open bands to writers and alloc necessary resources */
387 	qsort(open_bands, num_open, sizeof(open_bands[0]), _band_cmp);
388 
389 	for (i = 0; i < num_open; ++i) {
390 		band = open_bands[i];
391 
392 		if (band->md->type == FTL_BAND_TYPE_COMPACTION) {
393 			writer = &dev->writer_user;
394 		} else if (band->md->type == FTL_BAND_TYPE_GC) {
395 			writer = &dev->writer_gc;
396 		} else {
397 			assert(false);
398 		}
399 
400 		if (band->md->state == FTL_BAND_STATE_FULL) {
401 			TAILQ_INSERT_TAIL(&writer->full_bands, band, queue_entry);
402 		} else {
403 			if (writer->band == NULL) {
404 				writer->band = band;
405 			} else {
406 				writer->next_band = band;
407 			}
408 		}
409 
410 		writer->num_bands++;
411 		ftl_band_set_owner(band, ftl_writer_band_state_change, writer);
412 
413 		if (fast_startup) {
414 			FTL_NOTICELOG(dev, "SHM: band open P2L map df_id 0x%"PRIx64"\n", band->md->df_p2l_map);
415 			if (ftl_band_open_p2l_map(band)) {
416 				ftl_mngt_fail_step(mngt);
417 				return;
418 			}
419 
420 			offset = band->md->iter.offset;
421 			ftl_band_iter_init(band);
422 			ftl_band_iter_set(band, offset);
423 			ftl_mngt_p2l_ckpt_restore_shm_clean(band);
424 		} else if (dev->sb->clean) {
425 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
426 			if (ftl_band_alloc_p2l_map(band)) {
427 				ftl_mngt_fail_step(mngt);
428 				return;
429 			}
430 
431 			offset = band->md->iter.offset;
432 			ftl_band_iter_init(band);
433 			ftl_band_iter_set(band, offset);
434 
435 			if (ftl_mngt_p2l_ckpt_restore_clean(band)) {
436 				ftl_mngt_fail_step(mngt);
437 				return;
438 			}
439 		}
440 	}
441 
442 	if (fast_startup) {
443 		ftl_mempool_initialize_ext(dev->p2l_pool);
444 	}
445 
446 
447 	/* Recalculate number of free bands */
448 	dev->num_free = 0;
449 	TAILQ_FOREACH(band, &dev->free_bands, queue_entry) {
450 		assert(band->md->state == FTL_BAND_STATE_FREE);
451 		dev->num_free++;
452 	}
453 	ftl_apply_limits(dev);
454 
455 	if ((num_shut + num_open + dev->num_free) != ftl_get_num_bands(dev)) {
456 		FTL_ERRLOG(dev, "ERROR, band list inconsistent state\n");
457 		ftl_mngt_fail_step(mngt);
458 		return;
459 	}
460 
461 	if (finalize_init_gc(dev)) {
462 		ftl_mngt_fail_step(mngt);
463 	} else {
464 		ftl_mngt_next_step(mngt);
465 	}
466 }
467