xref: /spdk/module/bdev/gpt/vbdev_gpt.c (revision f8abbede89d30584d2a4f8427b13896f8591b873)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 /*
7  * This driver reads a GPT partition table from a bdev and exposes a virtual block device for
8  * each partition.
9  */
10 
11 #define REGISTER_GUID_DEPRECATION
12 #include "gpt.h"
13 
14 #include "spdk/endian.h"
15 #include "spdk/env.h"
16 #include "spdk/thread.h"
17 #include "spdk/rpc.h"
18 #include "spdk/string.h"
19 #include "spdk/util.h"
20 
21 #include "spdk/bdev_module.h"
22 #include "spdk/log.h"
23 
24 static int vbdev_gpt_init(void);
25 static void vbdev_gpt_examine(struct spdk_bdev *bdev);
26 static int vbdev_gpt_get_ctx_size(void);
27 
28 static struct spdk_bdev_module gpt_if = {
29 	.name = "gpt",
30 	.module_init = vbdev_gpt_init,
31 	.get_ctx_size = vbdev_gpt_get_ctx_size,
32 	.examine_disk = vbdev_gpt_examine,
33 
34 };
35 SPDK_BDEV_MODULE_REGISTER(gpt, &gpt_if)
36 
37 /* Base block device gpt context */
38 struct gpt_base {
39 	struct spdk_gpt			gpt;
40 	struct spdk_bdev_part_base	*part_base;
41 	SPDK_BDEV_PART_TAILQ		parts;
42 
43 	/* This channel is only used for reading the partition table. */
44 	struct spdk_io_channel		*ch;
45 };
46 
47 /* Context for each gpt virtual bdev */
48 struct gpt_disk {
49 	struct spdk_bdev_part	part;
50 	uint32_t		partition_index;
51 };
52 
53 struct gpt_channel {
54 	struct spdk_bdev_part_channel	part_ch;
55 };
56 
57 struct gpt_io {
58 	struct spdk_io_channel *ch;
59 	struct spdk_bdev_io *bdev_io;
60 
61 	/* for bdev_io_wait */
62 	struct spdk_bdev_io_wait_entry bdev_io_wait;
63 };
64 
65 static void
66 gpt_base_free(void *ctx)
67 {
68 	struct gpt_base *gpt_base = ctx;
69 
70 	spdk_free(gpt_base->gpt.buf);
71 	free(gpt_base);
72 }
73 
74 static void
75 gpt_base_bdev_hotremove_cb(void *_part_base)
76 {
77 	struct spdk_bdev_part_base *part_base = _part_base;
78 	struct gpt_base *gpt_base = spdk_bdev_part_base_get_ctx(part_base);
79 
80 	spdk_bdev_part_base_hotremove(part_base, &gpt_base->parts);
81 }
82 
83 static int vbdev_gpt_destruct(void *ctx);
84 static void vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
85 static int vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w);
86 static int vbdev_gpt_get_memory_domains(void *ctx, struct spdk_memory_domain **domains,
87 					int array_size);
88 
89 static struct spdk_bdev_fn_table vbdev_gpt_fn_table = {
90 	.destruct		= vbdev_gpt_destruct,
91 	.submit_request		= vbdev_gpt_submit_request,
92 	.dump_info_json		= vbdev_gpt_dump_info_json,
93 	.get_memory_domains	= vbdev_gpt_get_memory_domains,
94 };
95 
96 static struct gpt_base *
97 gpt_base_bdev_init(struct spdk_bdev *bdev)
98 {
99 	struct gpt_base *gpt_base;
100 	struct spdk_gpt *gpt;
101 	int rc;
102 
103 	gpt_base = calloc(1, sizeof(*gpt_base));
104 	if (!gpt_base) {
105 		SPDK_ERRLOG("Cannot alloc memory for gpt_base pointer\n");
106 		return NULL;
107 	}
108 
109 	TAILQ_INIT(&gpt_base->parts);
110 	rc = spdk_bdev_part_base_construct_ext(spdk_bdev_get_name(bdev),
111 					       gpt_base_bdev_hotremove_cb,
112 					       &gpt_if, &vbdev_gpt_fn_table,
113 					       &gpt_base->parts, gpt_base_free, gpt_base,
114 					       sizeof(struct gpt_channel), NULL, NULL, &gpt_base->part_base);
115 	if (rc != 0) {
116 		free(gpt_base);
117 		SPDK_ERRLOG("cannot construct gpt_base");
118 		return NULL;
119 	}
120 
121 	gpt = &gpt_base->gpt;
122 	gpt->parse_phase = SPDK_GPT_PARSE_PHASE_PRIMARY;
123 	gpt->buf_size = spdk_max(SPDK_GPT_BUFFER_SIZE, bdev->blocklen);
124 	gpt->buf = spdk_zmalloc(gpt->buf_size, spdk_bdev_get_buf_align(bdev), NULL,
125 				SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
126 	if (!gpt->buf) {
127 		SPDK_ERRLOG("Cannot alloc buf\n");
128 		spdk_bdev_part_base_free(gpt_base->part_base);
129 		return NULL;
130 	}
131 
132 	gpt->sector_size = bdev->blocklen;
133 	gpt->total_sectors = bdev->blockcnt;
134 	gpt->lba_start = 0;
135 	gpt->lba_end = gpt->total_sectors - 1;
136 
137 	return gpt_base;
138 }
139 
140 static int
141 vbdev_gpt_destruct(void *ctx)
142 {
143 	struct gpt_disk *gpt_disk = ctx;
144 
145 	return spdk_bdev_part_free(&gpt_disk->part);
146 }
147 
148 static void _vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
149 
150 static void
151 vbdev_gpt_resubmit_request(void *arg)
152 {
153 	struct gpt_io *io = (struct gpt_io *)arg;
154 
155 	_vbdev_gpt_submit_request(io->ch, io->bdev_io);
156 }
157 
158 static void
159 vbdev_gpt_queue_io(struct gpt_io *io)
160 {
161 	struct gpt_channel *ch = spdk_io_channel_get_ctx(io->ch);
162 	int rc;
163 
164 	io->bdev_io_wait.bdev = io->bdev_io->bdev;
165 	io->bdev_io_wait.cb_fn = vbdev_gpt_resubmit_request;
166 	io->bdev_io_wait.cb_arg = io;
167 
168 	rc = spdk_bdev_queue_io_wait(io->bdev_io->bdev,
169 				     ch->part_ch.base_ch, &io->bdev_io_wait);
170 	if (rc != 0) {
171 		SPDK_ERRLOG("Queue io failed in vbdev_gpt_queue_io, rc=%d.\n", rc);
172 		spdk_bdev_io_complete(io->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
173 	}
174 }
175 
176 static void
177 vbdev_gpt_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
178 {
179 	if (!success) {
180 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
181 		return;
182 	}
183 
184 	_vbdev_gpt_submit_request(ch, bdev_io);
185 }
186 
187 static void
188 _vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
189 {
190 	struct gpt_channel *ch = spdk_io_channel_get_ctx(_ch);
191 	struct gpt_io *io = (struct gpt_io *)bdev_io->driver_ctx;
192 	int rc;
193 
194 	rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
195 	if (rc) {
196 		if (rc == -ENOMEM) {
197 			SPDK_DEBUGLOG(vbdev_gpt, "gpt: no memory, queue io\n");
198 			io->ch = _ch;
199 			io->bdev_io = bdev_io;
200 			vbdev_gpt_queue_io(io);
201 		} else {
202 			SPDK_ERRLOG("gpt: error on bdev_io submission, rc=%d.\n", rc);
203 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
204 		}
205 	}
206 }
207 
208 static void
209 vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
210 {
211 	switch (bdev_io->type) {
212 	case SPDK_BDEV_IO_TYPE_READ:
213 		spdk_bdev_io_get_buf(bdev_io, vbdev_gpt_get_buf_cb,
214 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
215 		break;
216 	default:
217 		_vbdev_gpt_submit_request(_ch, bdev_io);
218 		break;
219 	}
220 }
221 
222 static void
223 gpt_guid_to_uuid(const struct spdk_gpt_guid *guid, struct spdk_uuid *uuid)
224 {
225 #pragma pack(push, 1)
226 	struct tmp_uuid {
227 		uint32_t b0;
228 		uint16_t b4;
229 		uint16_t b6;
230 		uint16_t b8;
231 		uint16_t b10;
232 		uint32_t b12;
233 	} *ret = (struct tmp_uuid *)uuid;
234 #pragma pack(pop)
235 	SPDK_STATIC_ASSERT(sizeof(*ret) == sizeof(*uuid), "wrong size");
236 
237 	ret->b0 = from_be32(&guid->raw[0]);
238 	ret->b4 = from_be16(&guid->raw[4]);
239 	ret->b6 = from_be16(&guid->raw[6]);
240 	ret->b8 = from_le16(&guid->raw[8]);
241 	ret->b10 = from_le16(&guid->raw[10]);
242 	ret->b12 = from_le32(&guid->raw[12]);
243 }
244 
245 static void
246 write_guid(struct spdk_json_write_ctx *w, const struct spdk_gpt_guid *guid)
247 {
248 	spdk_json_write_string_fmt(w, "%08x-%04x-%04x-%04x-%04x%08x",
249 				   from_le32(&guid->raw[0]),
250 				   from_le16(&guid->raw[4]),
251 				   from_le16(&guid->raw[6]),
252 				   from_be16(&guid->raw[8]),
253 				   from_be16(&guid->raw[10]),
254 				   from_be32(&guid->raw[12]));
255 }
256 
257 static void
258 write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *str, size_t max_len)
259 {
260 	size_t len;
261 	const uint16_t *p;
262 
263 	for (len = 0, p = str; len < max_len && *p; p++) {
264 		len++;
265 	}
266 
267 	spdk_json_write_string_utf16le_raw(w, str, len);
268 }
269 
270 static int
271 vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
272 {
273 	struct gpt_disk *gpt_disk = SPDK_CONTAINEROF(ctx, struct gpt_disk, part);
274 	struct spdk_bdev_part_base *base_bdev = spdk_bdev_part_get_base(&gpt_disk->part);
275 	struct gpt_base *gpt_base = spdk_bdev_part_base_get_ctx(base_bdev);
276 	struct spdk_bdev *part_base_bdev = spdk_bdev_part_base_get_bdev(base_bdev);
277 	struct spdk_gpt *gpt = &gpt_base->gpt;
278 	struct spdk_gpt_partition_entry *gpt_entry = &gpt->partitions[gpt_disk->partition_index];
279 	uint64_t offset_blocks = spdk_bdev_part_get_offset_blocks(&gpt_disk->part);
280 
281 	spdk_json_write_named_object_begin(w, "gpt");
282 
283 	spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(part_base_bdev));
284 
285 	spdk_json_write_named_uint64(w, "offset_blocks", offset_blocks);
286 
287 	spdk_json_write_name(w, "partition_type_guid");
288 	write_guid(w, &gpt_entry->part_type_guid);
289 
290 	spdk_json_write_name(w, "unique_partition_guid");
291 	write_guid(w, &gpt_entry->unique_partition_guid);
292 
293 	spdk_json_write_name(w, "partition_name");
294 	write_string_utf16le(w, gpt_entry->partition_name, SPDK_COUNTOF(gpt_entry->partition_name));
295 
296 	spdk_json_write_object_end(w);
297 
298 	return 0;
299 }
300 
301 static int
302 vbdev_gpt_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
303 {
304 	struct gpt_disk *gpt_disk = SPDK_CONTAINEROF(ctx, struct gpt_disk, part);
305 	struct spdk_bdev_part_base *part_base = spdk_bdev_part_get_base(&gpt_disk->part);
306 	struct spdk_bdev *part_base_bdev = spdk_bdev_part_base_get_bdev(part_base);
307 
308 	if (part_base_bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK) {
309 		/* bdev_part remaps reftag and touches metadata buffer, that means it can't support memory domains
310 		 * if dif is enabled */
311 		return 0;
312 	}
313 
314 	return spdk_bdev_get_memory_domains(part_base_bdev, domains, array_size);
315 }
316 
317 static int
318 vbdev_gpt_create_bdevs(struct gpt_base *gpt_base)
319 {
320 	uint32_t num_partition_entries;
321 	uint64_t i, head_lba_start, head_lba_end;
322 	uint32_t num_partitions;
323 	struct spdk_gpt_partition_entry *p;
324 	struct gpt_disk *d;
325 	struct spdk_gpt *gpt;
326 	char *name;
327 	struct spdk_bdev *base_bdev;
328 	int rc;
329 
330 	gpt = &gpt_base->gpt;
331 	num_partition_entries = from_le32(&gpt->header->num_partition_entries);
332 	head_lba_start = from_le64(&gpt->header->first_usable_lba);
333 	head_lba_end = from_le64(&gpt->header->last_usable_lba);
334 	num_partitions = 0;
335 
336 	for (i = 0; i < num_partition_entries; i++) {
337 		p = &gpt->partitions[i];
338 		uint64_t lba_start = from_le64(&p->starting_lba);
339 		uint64_t lba_end = from_le64(&p->ending_lba);
340 		uint64_t partition_size = lba_end - lba_start + 1;
341 		struct spdk_bdev_part_construct_opts opts;
342 
343 		if (lba_start == 0) {
344 			continue;
345 		}
346 
347 		if (SPDK_GPT_GUID_EQUAL(&gpt->partitions[i].part_type_guid,
348 					&SPDK_GPT_PART_TYPE_GUID_OLD)) {
349 			/* GitHub issue #2801 - we continue to report these partitions with
350 			 * off-by-one sizing error to ensure we don't break layouts based
351 			 * on that smaller size. */
352 			partition_size -= 1;
353 		} else if (!SPDK_GPT_GUID_EQUAL(&gpt->partitions[i].part_type_guid,
354 						&SPDK_GPT_PART_TYPE_GUID)) {
355 			/* Partition type isn't TYPE_GUID or TYPE_GUID_OLD, so this isn't
356 			 * an SPDK parition.  Continue to the next partition.
357 			 */
358 			continue;
359 		}
360 
361 		if (lba_start < head_lba_start || lba_end > head_lba_end) {
362 			continue;
363 		}
364 
365 		d = calloc(1, sizeof(*d));
366 		if (!d) {
367 			SPDK_ERRLOG("Memory allocation failure\n");
368 			return -1;
369 		}
370 
371 		/* index start at 1 instead of 0 to match the existing style */
372 		base_bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
373 		name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1);
374 		if (!name) {
375 			SPDK_ERRLOG("name allocation failure\n");
376 			free(d);
377 			return -1;
378 		}
379 
380 		spdk_bdev_part_construct_opts_init(&opts, sizeof(opts));
381 		gpt_guid_to_uuid(&p->unique_partition_guid, &opts.uuid);
382 		rc = spdk_bdev_part_construct_ext(&d->part, gpt_base->part_base, name, lba_start,
383 						  partition_size, "GPT Disk", &opts);
384 		free(name);
385 		if (rc) {
386 			SPDK_ERRLOG("could not construct bdev part\n");
387 			/* spdk_bdev_part_construct will free name on failure */
388 			free(d);
389 			return -1;
390 		}
391 		num_partitions++;
392 		d->partition_index = i;
393 	}
394 
395 	return num_partitions;
396 }
397 
398 static void
399 gpt_read_secondary_table_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg)
400 {
401 	struct gpt_base *gpt_base = (struct gpt_base *)arg;
402 	struct spdk_bdev *bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
403 	int rc, num_partitions = 0;
404 
405 	spdk_bdev_free_io(bdev_io);
406 	spdk_put_io_channel(gpt_base->ch);
407 	gpt_base->ch = NULL;
408 
409 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
410 		SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n",
411 			    spdk_bdev_get_name(bdev), status);
412 		goto end;
413 	}
414 
415 	rc = gpt_parse_partition_table(&gpt_base->gpt);
416 	if (rc) {
417 		SPDK_DEBUGLOG(vbdev_gpt, "Failed to parse secondary partition table\n");
418 		goto end;
419 	}
420 
421 	SPDK_WARNLOG("Gpt: bdev=%s primary partition table broken, use the secondary\n",
422 		     spdk_bdev_get_name(bdev));
423 
424 	num_partitions = vbdev_gpt_create_bdevs(gpt_base);
425 	if (num_partitions < 0) {
426 		SPDK_DEBUGLOG(vbdev_gpt, "Failed to split dev=%s by gpt table\n",
427 			      spdk_bdev_get_name(bdev));
428 	}
429 
430 end:
431 	spdk_bdev_module_examine_done(&gpt_if);
432 	if (num_partitions <= 0) {
433 		/* If no gpt_disk instances were created, free the base context */
434 		spdk_bdev_part_base_free(gpt_base->part_base);
435 	}
436 }
437 
438 static int
439 vbdev_gpt_read_secondary_table(struct gpt_base *gpt_base)
440 {
441 	struct spdk_gpt *gpt;
442 	struct spdk_bdev_desc *part_base_desc;
443 	uint64_t secondary_offset;
444 
445 	gpt = &gpt_base->gpt;
446 	gpt->parse_phase = SPDK_GPT_PARSE_PHASE_SECONDARY;
447 	gpt->header = NULL;
448 	gpt->partitions = NULL;
449 
450 	part_base_desc = spdk_bdev_part_base_get_desc(gpt_base->part_base);
451 
452 	secondary_offset = gpt->total_sectors * gpt->sector_size - gpt->buf_size;
453 	return spdk_bdev_read(part_base_desc, gpt_base->ch, gpt_base->gpt.buf, secondary_offset,
454 			      gpt_base->gpt.buf_size, gpt_read_secondary_table_complete,
455 			      gpt_base);
456 }
457 
458 static void
459 gpt_bdev_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg)
460 {
461 	struct gpt_base *gpt_base = (struct gpt_base *)arg;
462 	struct spdk_bdev *bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
463 	int rc, num_partitions = 0;
464 
465 	spdk_bdev_free_io(bdev_io);
466 
467 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
468 		SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n",
469 			    spdk_bdev_get_name(bdev), status);
470 		goto end;
471 	}
472 
473 	rc = gpt_parse_mbr(&gpt_base->gpt);
474 	if (rc) {
475 		SPDK_DEBUGLOG(vbdev_gpt, "Failed to parse mbr\n");
476 		goto end;
477 	}
478 
479 	rc = gpt_parse_partition_table(&gpt_base->gpt);
480 	if (rc) {
481 		SPDK_DEBUGLOG(vbdev_gpt, "Failed to parse primary partition table\n");
482 		rc = vbdev_gpt_read_secondary_table(gpt_base);
483 		if (rc) {
484 			SPDK_ERRLOG("Failed to read secondary table\n");
485 			goto end;
486 		}
487 		return;
488 	}
489 
490 	num_partitions = vbdev_gpt_create_bdevs(gpt_base);
491 	if (num_partitions < 0) {
492 		SPDK_DEBUGLOG(vbdev_gpt, "Failed to split dev=%s by gpt table\n",
493 			      spdk_bdev_get_name(bdev));
494 	}
495 
496 end:
497 	spdk_put_io_channel(gpt_base->ch);
498 	gpt_base->ch = NULL;
499 	/*
500 	 * Notify the generic bdev layer that the actions related to the original examine
501 	 *  callback are now completed.
502 	 */
503 	spdk_bdev_module_examine_done(&gpt_if);
504 
505 	/*
506 	 * vbdev_gpt_create_bdevs returns the number of bdevs created upon success.
507 	 * We can branch on this value.
508 	 */
509 	if (num_partitions <= 0) {
510 		/* If no gpt_disk instances were created, free the base context */
511 		spdk_bdev_part_base_free(gpt_base->part_base);
512 	}
513 }
514 
515 static int
516 vbdev_gpt_read_gpt(struct spdk_bdev *bdev)
517 {
518 	struct gpt_base *gpt_base;
519 	struct spdk_bdev_desc *part_base_desc;
520 	int rc;
521 
522 	gpt_base = gpt_base_bdev_init(bdev);
523 	if (!gpt_base) {
524 		SPDK_ERRLOG("Cannot allocated gpt_base\n");
525 		return -1;
526 	}
527 
528 	part_base_desc = spdk_bdev_part_base_get_desc(gpt_base->part_base);
529 	gpt_base->ch = spdk_bdev_get_io_channel(part_base_desc);
530 	if (gpt_base->ch == NULL) {
531 		SPDK_ERRLOG("Failed to get an io_channel.\n");
532 		spdk_bdev_part_base_free(gpt_base->part_base);
533 		return -1;
534 	}
535 
536 	rc = spdk_bdev_read(part_base_desc, gpt_base->ch, gpt_base->gpt.buf, 0,
537 			    gpt_base->gpt.buf_size, gpt_bdev_complete, gpt_base);
538 	if (rc < 0) {
539 		spdk_put_io_channel(gpt_base->ch);
540 		spdk_bdev_part_base_free(gpt_base->part_base);
541 		SPDK_ERRLOG("Failed to send bdev_io command\n");
542 		return -1;
543 	}
544 
545 	return 0;
546 }
547 
548 static int
549 vbdev_gpt_init(void)
550 {
551 	return 0;
552 }
553 
554 static int
555 vbdev_gpt_get_ctx_size(void)
556 {
557 	return sizeof(struct gpt_io);
558 }
559 
560 static void
561 vbdev_gpt_examine(struct spdk_bdev *bdev)
562 {
563 	int rc;
564 
565 	/* A bdev with fewer than 2 blocks cannot have a GPT. Block 0 has
566 	 * the MBR and block 1 has the GPT header.
567 	 */
568 	if (spdk_bdev_get_num_blocks(bdev) < 2) {
569 		spdk_bdev_module_examine_done(&gpt_if);
570 		return;
571 	}
572 
573 	if (spdk_bdev_get_block_size(bdev) % 512 != 0) {
574 		SPDK_DEBUGLOG(vbdev_gpt,
575 			      "GPT module does not support block size %" PRIu32 " for bdev %s\n",
576 			      spdk_bdev_get_block_size(bdev), spdk_bdev_get_name(bdev));
577 		spdk_bdev_module_examine_done(&gpt_if);
578 		return;
579 	}
580 
581 	rc = vbdev_gpt_read_gpt(bdev);
582 	if (rc) {
583 		spdk_bdev_module_examine_done(&gpt_if);
584 		SPDK_ERRLOG("Failed to read info from bdev %s\n", spdk_bdev_get_name(bdev));
585 	}
586 }
587 
588 SPDK_LOG_REGISTER_COMPONENT(vbdev_gpt)
589