xref: /spdk/lib/bdev/part.c (revision aaba5d9c9e8fca9925d5812030ff3ec9ba869fa3)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2018 Intel Corporation.
3  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES.
4  *   All rights reserved.
5  */
6 
7 /*
8  * Common code for partition-like virtual bdevs.
9  */
10 
11 #include "spdk/bdev.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/string.h"
15 #include "spdk/thread.h"
16 
17 #include "spdk/bdev_module.h"
18 
19 struct spdk_bdev_part_base {
20 	struct spdk_bdev		*bdev;
21 	struct spdk_bdev_desc		*desc;
22 	uint32_t			ref;
23 	uint32_t			channel_size;
24 	spdk_bdev_part_base_free_fn	base_free_fn;
25 	void				*ctx;
26 	bool				claimed;
27 	struct spdk_bdev_module		*module;
28 	struct spdk_bdev_fn_table	*fn_table;
29 	struct bdev_part_tailq		*tailq;
30 	spdk_io_channel_create_cb	ch_create_cb;
31 	spdk_io_channel_destroy_cb	ch_destroy_cb;
32 	spdk_bdev_remove_cb_t		remove_cb;
33 	struct spdk_thread		*thread;
34 };
35 
36 struct spdk_bdev *
37 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
38 {
39 	return part_base->bdev;
40 }
41 
42 struct spdk_bdev_desc *
43 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
44 {
45 	return part_base->desc;
46 }
47 
48 struct bdev_part_tailq *
49 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
50 {
51 	return part_base->tailq;
52 }
53 
54 void *
55 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
56 {
57 	return part_base->ctx;
58 }
59 
60 const char *
61 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
62 {
63 	return part_base->bdev->name;
64 }
65 
66 static void
67 bdev_part_base_free(void *ctx)
68 {
69 	struct spdk_bdev_desc *desc = ctx;
70 
71 	spdk_bdev_close(desc);
72 }
73 
74 void
75 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
76 {
77 	if (base->desc) {
78 		/* Close the underlying bdev on its same opened thread. */
79 		if (base->thread && base->thread != spdk_get_thread()) {
80 			spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc);
81 		} else {
82 			spdk_bdev_close(base->desc);
83 		}
84 	}
85 
86 	if (base->base_free_fn != NULL) {
87 		base->base_free_fn(base->ctx);
88 	}
89 
90 	free(base);
91 }
92 
93 static void
94 bdev_part_free_cb(void *io_device)
95 {
96 	struct spdk_bdev_part *part = io_device;
97 	struct spdk_bdev_part_base *base;
98 
99 	assert(part);
100 	assert(part->internal.base);
101 
102 	base = part->internal.base;
103 
104 	TAILQ_REMOVE(base->tailq, part, tailq);
105 
106 	if (--base->ref == 0) {
107 		spdk_bdev_module_release_bdev(base->bdev);
108 		spdk_bdev_part_base_free(base);
109 	}
110 
111 	spdk_bdev_destruct_done(&part->internal.bdev, 0);
112 	free(part->internal.bdev.name);
113 	free(part->internal.bdev.product_name);
114 	free(part);
115 }
116 
117 int
118 spdk_bdev_part_free(struct spdk_bdev_part *part)
119 {
120 	spdk_io_device_unregister(part, bdev_part_free_cb);
121 
122 	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
123 	 * until spdk_bdev_destruct_done is called */
124 	return 1;
125 }
126 
127 void
128 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
129 {
130 	struct spdk_bdev_part *part, *tmp;
131 
132 	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
133 		if (part->internal.base == part_base) {
134 			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
135 		}
136 	}
137 }
138 
139 static bool
140 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
141 {
142 	struct spdk_bdev_part *part = _part;
143 
144 	/* We can't decode/modify passthrough NVMe commands, so don't report
145 	 *  that a partition supports these io types, even if the underlying
146 	 *  bdev does.
147 	 */
148 	switch (io_type) {
149 	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
150 	case SPDK_BDEV_IO_TYPE_NVME_IO:
151 	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
152 		return false;
153 	default:
154 		break;
155 	}
156 
157 	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
158 			io_type);
159 }
160 
161 static struct spdk_io_channel *
162 bdev_part_get_io_channel(void *_part)
163 {
164 	struct spdk_bdev_part *part = _part;
165 
166 	return spdk_get_io_channel(part);
167 }
168 
169 struct spdk_bdev *
170 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
171 {
172 	return &part->internal.bdev;
173 }
174 
175 struct spdk_bdev_part_base *
176 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
177 {
178 	return part->internal.base;
179 }
180 
181 struct spdk_bdev *
182 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
183 {
184 	return part->internal.base->bdev;
185 }
186 
187 uint64_t
188 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
189 {
190 	return part->internal.offset_blocks;
191 }
192 
193 static int
194 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
195 		    uint32_t remapped_offset)
196 {
197 	struct spdk_bdev *bdev = bdev_io->bdev;
198 	struct spdk_dif_ctx dif_ctx;
199 	struct spdk_dif_error err_blk = {};
200 	int rc;
201 
202 	if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
203 		return 0;
204 	}
205 
206 	rc = spdk_dif_ctx_init(&dif_ctx,
207 			       bdev->blocklen, bdev->md_len, bdev->md_interleave,
208 			       bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
209 			       offset, 0, 0, 0, 0);
210 	if (rc != 0) {
211 		SPDK_ERRLOG("Initialization of DIF context failed\n");
212 		return rc;
213 	}
214 
215 	spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
216 
217 	if (bdev->md_interleave) {
218 		rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
219 					    bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
220 	} else {
221 		struct iovec md_iov = {
222 			.iov_base	= bdev_io->u.bdev.md_buf,
223 			.iov_len	= bdev_io->u.bdev.num_blocks * bdev->md_len,
224 		};
225 
226 		rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
227 	}
228 
229 	if (rc != 0) {
230 		SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
231 			    err_blk.err_type, err_blk.err_offset);
232 	}
233 
234 	return rc;
235 }
236 
237 static void
238 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
239 {
240 	struct spdk_bdev_io *part_io = cb_arg;
241 	uint32_t offset, remapped_offset;
242 	spdk_bdev_io_completion_cb cb;
243 	int rc, status;
244 
245 	switch (bdev_io->type) {
246 	case SPDK_BDEV_IO_TYPE_READ:
247 		if (success) {
248 			offset = bdev_io->u.bdev.offset_blocks;
249 			remapped_offset = part_io->u.bdev.offset_blocks;
250 
251 			rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
252 			if (rc != 0) {
253 				success = false;
254 			}
255 		}
256 		break;
257 	case SPDK_BDEV_IO_TYPE_ZCOPY:
258 		spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base,
259 				     bdev_io->u.bdev.iovs[0].iov_len);
260 		break;
261 	default:
262 		break;
263 	}
264 
265 
266 	cb = part_io->u.bdev.stored_user_cb;
267 	if (cb != NULL) {
268 		cb(part_io, success, NULL);
269 	} else {
270 		status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
271 
272 		spdk_bdev_io_complete(part_io, status);
273 	}
274 
275 	spdk_bdev_free_io(bdev_io);
276 }
277 
278 static inline void
279 bdev_part_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts)
280 {
281 	memset(opts, 0, sizeof(*opts));
282 	opts->size = sizeof(*opts);
283 	opts->memory_domain = bdev_io->u.bdev.memory_domain;
284 	opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx;
285 	opts->metadata = bdev_io->u.bdev.md_buf;
286 }
287 
288 int
289 spdk_bdev_part_submit_request_ext(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io,
290 				  spdk_bdev_io_completion_cb cb)
291 {
292 	struct spdk_bdev_part *part = ch->part;
293 	struct spdk_io_channel *base_ch = ch->base_ch;
294 	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
295 	struct spdk_bdev_ext_io_opts io_opts;
296 	uint64_t offset, remapped_offset, remapped_src_offset;
297 	int rc = 0;
298 
299 	bdev_io->u.bdev.stored_user_cb = cb;
300 
301 	offset = bdev_io->u.bdev.offset_blocks;
302 	remapped_offset = offset + part->internal.offset_blocks;
303 
304 	/* Modify the I/O to adjust for the offset within the base bdev. */
305 	switch (bdev_io->type) {
306 	case SPDK_BDEV_IO_TYPE_READ:
307 		bdev_part_init_ext_io_opts(bdev_io, &io_opts);
308 		rc = spdk_bdev_readv_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs,
309 						bdev_io->u.bdev.iovcnt, remapped_offset,
310 						bdev_io->u.bdev.num_blocks,
311 						bdev_part_complete_io, bdev_io, &io_opts);
312 		break;
313 	case SPDK_BDEV_IO_TYPE_WRITE:
314 		rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
315 		if (rc != 0) {
316 			return SPDK_BDEV_IO_STATUS_FAILED;
317 		}
318 		bdev_part_init_ext_io_opts(bdev_io, &io_opts);
319 		rc = spdk_bdev_writev_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs,
320 						 bdev_io->u.bdev.iovcnt, remapped_offset,
321 						 bdev_io->u.bdev.num_blocks,
322 						 bdev_part_complete_io, bdev_io, &io_opts);
323 		break;
324 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
325 		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
326 						   bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
327 						   bdev_io);
328 		break;
329 	case SPDK_BDEV_IO_TYPE_UNMAP:
330 		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
331 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
332 					    bdev_io);
333 		break;
334 	case SPDK_BDEV_IO_TYPE_FLUSH:
335 		rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
336 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
337 					    bdev_io);
338 		break;
339 	case SPDK_BDEV_IO_TYPE_RESET:
340 		rc = spdk_bdev_reset(base_desc, base_ch,
341 				     bdev_part_complete_io, bdev_io);
342 		break;
343 	case SPDK_BDEV_IO_TYPE_ZCOPY:
344 		rc = spdk_bdev_zcopy_start(base_desc, base_ch, NULL, 0, remapped_offset,
345 					   bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
346 					   bdev_part_complete_io, bdev_io);
347 		break;
348 	case SPDK_BDEV_IO_TYPE_COMPARE:
349 		if (!bdev_io->u.bdev.md_buf) {
350 			rc = spdk_bdev_comparev_blocks(base_desc, base_ch,
351 						       bdev_io->u.bdev.iovs,
352 						       bdev_io->u.bdev.iovcnt,
353 						       remapped_offset,
354 						       bdev_io->u.bdev.num_blocks,
355 						       bdev_part_complete_io, bdev_io);
356 		} else {
357 			rc = spdk_bdev_comparev_blocks_with_md(base_desc, base_ch,
358 							       bdev_io->u.bdev.iovs,
359 							       bdev_io->u.bdev.iovcnt,
360 							       bdev_io->u.bdev.md_buf,
361 							       remapped_offset,
362 							       bdev_io->u.bdev.num_blocks,
363 							       bdev_part_complete_io, bdev_io);
364 		}
365 		break;
366 	case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
367 		rc = spdk_bdev_comparev_and_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
368 				bdev_io->u.bdev.iovcnt,
369 				bdev_io->u.bdev.fused_iovs,
370 				bdev_io->u.bdev.fused_iovcnt,
371 				remapped_offset,
372 				bdev_io->u.bdev.num_blocks,
373 				bdev_part_complete_io, bdev_io);
374 		break;
375 	case SPDK_BDEV_IO_TYPE_COPY:
376 		remapped_src_offset = bdev_io->u.bdev.copy.src_offset_blocks + part->internal.offset_blocks;
377 		rc = spdk_bdev_copy_blocks(base_desc, base_ch, remapped_offset, remapped_src_offset,
378 					   bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
379 					   bdev_io);
380 		break;
381 	default:
382 		SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
383 		return SPDK_BDEV_IO_STATUS_FAILED;
384 	}
385 
386 	return rc;
387 }
388 
389 int
390 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
391 {
392 	return spdk_bdev_part_submit_request_ext(ch, bdev_io, NULL);
393 }
394 
395 static int
396 bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
397 {
398 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
399 	struct spdk_bdev_part_channel *ch = ctx_buf;
400 
401 	ch->part = part;
402 	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
403 	if (ch->base_ch == NULL) {
404 		return -1;
405 	}
406 
407 	if (part->internal.base->ch_create_cb) {
408 		return part->internal.base->ch_create_cb(io_device, ctx_buf);
409 	} else {
410 		return 0;
411 	}
412 }
413 
414 static void
415 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
416 {
417 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
418 	struct spdk_bdev_part_channel *ch = ctx_buf;
419 
420 	if (part->internal.base->ch_destroy_cb) {
421 		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
422 	}
423 	spdk_put_io_channel(ch->base_ch);
424 }
425 
426 static void
427 bdev_part_base_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
428 			void *event_ctx)
429 {
430 	struct spdk_bdev_part_base *base = event_ctx;
431 
432 	switch (type) {
433 	case SPDK_BDEV_EVENT_REMOVE:
434 		base->remove_cb(base);
435 		break;
436 	default:
437 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
438 		break;
439 	}
440 }
441 
442 int
443 spdk_bdev_part_base_construct_ext(const char *bdev_name,
444 				  spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
445 				  struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
446 				  spdk_bdev_part_base_free_fn free_fn, void *ctx,
447 				  uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
448 				  spdk_io_channel_destroy_cb ch_destroy_cb,
449 				  struct spdk_bdev_part_base **_base)
450 {
451 	int rc;
452 	struct spdk_bdev_part_base *base;
453 
454 	if (_base == NULL) {
455 		return -EINVAL;
456 	}
457 
458 	base = calloc(1, sizeof(*base));
459 	if (!base) {
460 		SPDK_ERRLOG("Memory allocation failure\n");
461 		return -ENOMEM;
462 	}
463 	fn_table->get_io_channel = bdev_part_get_io_channel;
464 	fn_table->io_type_supported = bdev_part_io_type_supported;
465 
466 	base->desc = NULL;
467 	base->ref = 0;
468 	base->module = module;
469 	base->fn_table = fn_table;
470 	base->tailq = tailq;
471 	base->base_free_fn = free_fn;
472 	base->ctx = ctx;
473 	base->claimed = false;
474 	base->channel_size = channel_size;
475 	base->ch_create_cb = ch_create_cb;
476 	base->ch_destroy_cb = ch_destroy_cb;
477 	base->remove_cb = remove_cb;
478 
479 	rc = spdk_bdev_open_ext(bdev_name, false, bdev_part_base_event_cb, base, &base->desc);
480 	if (rc) {
481 		if (rc == -ENODEV) {
482 			free(base);
483 		} else {
484 			SPDK_ERRLOG("could not open bdev %s: %s\n", bdev_name, spdk_strerror(-rc));
485 			spdk_bdev_part_base_free(base);
486 		}
487 		return rc;
488 	}
489 
490 	base->bdev = spdk_bdev_desc_get_bdev(base->desc);
491 
492 	/* Save the thread where the base device is opened */
493 	base->thread = spdk_get_thread();
494 
495 	*_base = base;
496 
497 	return 0;
498 }
499 
500 int
501 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
502 			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
503 			 char *product_name)
504 {
505 	int rc;
506 	bool first_claimed = false;
507 
508 	part->internal.bdev.blocklen = base->bdev->blocklen;
509 	part->internal.bdev.blockcnt = num_blocks;
510 	part->internal.offset_blocks = offset_blocks;
511 
512 	part->internal.bdev.write_cache = base->bdev->write_cache;
513 	part->internal.bdev.required_alignment = base->bdev->required_alignment;
514 	part->internal.bdev.ctxt = part;
515 	part->internal.bdev.module = base->module;
516 	part->internal.bdev.fn_table = base->fn_table;
517 
518 	part->internal.bdev.md_interleave = base->bdev->md_interleave;
519 	part->internal.bdev.md_len = base->bdev->md_len;
520 	part->internal.bdev.dif_type = base->bdev->dif_type;
521 	part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
522 	part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
523 
524 	part->internal.bdev.name = strdup(name);
525 	if (part->internal.bdev.name == NULL) {
526 		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
527 		return -1;
528 	}
529 
530 	part->internal.bdev.product_name = strdup(product_name);
531 	if (part->internal.bdev.product_name == NULL) {
532 		free(part->internal.bdev.name);
533 		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
534 			    spdk_bdev_get_name(base->bdev));
535 		return -1;
536 	}
537 
538 	base->ref++;
539 	part->internal.base = base;
540 
541 	if (!base->claimed) {
542 		int rc;
543 
544 		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
545 		if (rc) {
546 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
547 			free(part->internal.bdev.name);
548 			free(part->internal.bdev.product_name);
549 			base->ref--;
550 			return -1;
551 		}
552 		base->claimed = true;
553 		first_claimed = true;
554 	}
555 
556 	spdk_io_device_register(part, bdev_part_channel_create_cb,
557 				bdev_part_channel_destroy_cb,
558 				base->channel_size,
559 				name);
560 
561 	rc = spdk_bdev_register(&part->internal.bdev);
562 	if (rc == 0) {
563 		TAILQ_INSERT_TAIL(base->tailq, part, tailq);
564 	} else {
565 		spdk_io_device_unregister(part, NULL);
566 		if (--base->ref == 0) {
567 			spdk_bdev_module_release_bdev(base->bdev);
568 		}
569 		free(part->internal.bdev.name);
570 		free(part->internal.bdev.product_name);
571 		if (first_claimed == true) {
572 			base->claimed = false;
573 		}
574 	}
575 
576 	return rc;
577 }
578