xref: /spdk/lib/bdev/part.c (revision 10ba9348459cc7222973aa7a1f3316ce2240f5c9)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Common code for partition-like virtual bdevs.
36  */
37 
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
40 #include "spdk/log.h"
41 #include "spdk/string.h"
42 #include "spdk/thread.h"
43 
44 #include "spdk/bdev_module.h"
45 
46 struct spdk_bdev_part_base {
47 	struct spdk_bdev		*bdev;
48 	struct spdk_bdev_desc		*desc;
49 	uint32_t			ref;
50 	uint32_t			channel_size;
51 	spdk_bdev_part_base_free_fn	base_free_fn;
52 	void				*ctx;
53 	bool				claimed;
54 	struct spdk_bdev_module		*module;
55 	struct spdk_bdev_fn_table	*fn_table;
56 	struct bdev_part_tailq		*tailq;
57 	spdk_io_channel_create_cb	ch_create_cb;
58 	spdk_io_channel_destroy_cb	ch_destroy_cb;
59 	spdk_bdev_remove_cb_t		remove_cb;
60 	struct spdk_thread		*thread;
61 };
62 
63 struct spdk_bdev *
64 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
65 {
66 	return part_base->bdev;
67 }
68 
69 struct spdk_bdev_desc *
70 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
71 {
72 	return part_base->desc;
73 }
74 
75 struct bdev_part_tailq *
76 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
77 {
78 	return part_base->tailq;
79 }
80 
81 void *
82 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
83 {
84 	return part_base->ctx;
85 }
86 
87 const char *
88 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
89 {
90 	return part_base->bdev->name;
91 }
92 
93 static void
94 bdev_part_base_free(void *ctx)
95 {
96 	struct spdk_bdev_desc *desc = ctx;
97 
98 	spdk_bdev_close(desc);
99 }
100 
101 void
102 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
103 {
104 	if (base->desc) {
105 		/* Close the underlying bdev on its same opened thread. */
106 		if (base->thread && base->thread != spdk_get_thread()) {
107 			spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc);
108 		} else {
109 			spdk_bdev_close(base->desc);
110 		}
111 	}
112 
113 	if (base->base_free_fn != NULL) {
114 		base->base_free_fn(base->ctx);
115 	}
116 
117 	free(base);
118 }
119 
120 static void
121 bdev_part_free_cb(void *io_device)
122 {
123 	struct spdk_bdev_part *part = io_device;
124 	struct spdk_bdev_part_base *base;
125 
126 	assert(part);
127 	assert(part->internal.base);
128 
129 	base = part->internal.base;
130 
131 	TAILQ_REMOVE(base->tailq, part, tailq);
132 
133 	if (--base->ref == 0) {
134 		spdk_bdev_module_release_bdev(base->bdev);
135 		spdk_bdev_part_base_free(base);
136 	}
137 
138 	spdk_bdev_destruct_done(&part->internal.bdev, 0);
139 	free(part->internal.bdev.name);
140 	free(part->internal.bdev.product_name);
141 	free(part);
142 }
143 
144 int
145 spdk_bdev_part_free(struct spdk_bdev_part *part)
146 {
147 	spdk_io_device_unregister(part, bdev_part_free_cb);
148 
149 	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
150 	 * until spdk_bdev_destruct_done is called */
151 	return 1;
152 }
153 
154 void
155 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
156 {
157 	struct spdk_bdev_part *part, *tmp;
158 
159 	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
160 		if (part->internal.base == part_base) {
161 			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
162 		}
163 	}
164 }
165 
166 static bool
167 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
168 {
169 	struct spdk_bdev_part *part = _part;
170 
171 	/* We can't decode/modify passthrough NVMe commands, so don't report
172 	 *  that a partition supports these io types, even if the underlying
173 	 *  bdev does.
174 	 */
175 	switch (io_type) {
176 	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
177 	case SPDK_BDEV_IO_TYPE_NVME_IO:
178 	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
179 		return false;
180 	default:
181 		break;
182 	}
183 
184 	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
185 			io_type);
186 }
187 
188 static struct spdk_io_channel *
189 bdev_part_get_io_channel(void *_part)
190 {
191 	struct spdk_bdev_part *part = _part;
192 
193 	return spdk_get_io_channel(part);
194 }
195 
196 struct spdk_bdev *
197 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
198 {
199 	return &part->internal.bdev;
200 }
201 
202 struct spdk_bdev_part_base *
203 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
204 {
205 	return part->internal.base;
206 }
207 
208 struct spdk_bdev *
209 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
210 {
211 	return part->internal.base->bdev;
212 }
213 
214 uint64_t
215 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
216 {
217 	return part->internal.offset_blocks;
218 }
219 
220 static int
221 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
222 		    uint32_t remapped_offset)
223 {
224 	struct spdk_bdev *bdev = bdev_io->bdev;
225 	struct spdk_dif_ctx dif_ctx;
226 	struct spdk_dif_error err_blk = {};
227 	int rc;
228 
229 	if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
230 		return 0;
231 	}
232 
233 	rc = spdk_dif_ctx_init(&dif_ctx,
234 			       bdev->blocklen, bdev->md_len, bdev->md_interleave,
235 			       bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
236 			       offset, 0, 0, 0, 0);
237 	if (rc != 0) {
238 		SPDK_ERRLOG("Initialization of DIF context failed\n");
239 		return rc;
240 	}
241 
242 	spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
243 
244 	if (bdev->md_interleave) {
245 		rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
246 					    bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
247 	} else {
248 		struct iovec md_iov = {
249 			.iov_base	= bdev_io->u.bdev.md_buf,
250 			.iov_len	= bdev_io->u.bdev.num_blocks * bdev->md_len,
251 		};
252 
253 		rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
254 	}
255 
256 	if (rc != 0) {
257 		SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
258 			    err_blk.err_type, err_blk.err_offset);
259 	}
260 
261 	return rc;
262 }
263 
264 static void
265 bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
266 {
267 	struct spdk_bdev_io *part_io = cb_arg;
268 	uint32_t offset, remapped_offset;
269 	int rc, status;
270 
271 	offset = bdev_io->u.bdev.offset_blocks;
272 	remapped_offset = part_io->u.bdev.offset_blocks;
273 
274 	if (success) {
275 		rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
276 		if (rc != 0) {
277 			success = false;
278 		}
279 	}
280 
281 	status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
282 
283 	spdk_bdev_io_complete(part_io, status);
284 	spdk_bdev_free_io(bdev_io);
285 }
286 
287 static void
288 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
289 {
290 	struct spdk_bdev_io *part_io = cb_arg;
291 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
292 
293 	spdk_bdev_io_complete(part_io, status);
294 	spdk_bdev_free_io(bdev_io);
295 }
296 
297 static void
298 bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
299 {
300 	struct spdk_bdev_io *part_io = cb_arg;
301 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
302 
303 	spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len);
304 	spdk_bdev_io_complete(part_io, status);
305 	spdk_bdev_free_io(bdev_io);
306 }
307 
308 int
309 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
310 {
311 	struct spdk_bdev_part *part = ch->part;
312 	struct spdk_io_channel *base_ch = ch->base_ch;
313 	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
314 	uint64_t offset, remapped_offset;
315 	int rc = 0;
316 
317 	offset = bdev_io->u.bdev.offset_blocks;
318 	remapped_offset = offset + part->internal.offset_blocks;
319 
320 	/* Modify the I/O to adjust for the offset within the base bdev. */
321 	switch (bdev_io->type) {
322 	case SPDK_BDEV_IO_TYPE_READ:
323 		if (bdev_io->u.bdev.ext_opts || !bdev_io->u.bdev.md_buf) {
324 			rc = spdk_bdev_readv_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs,
325 							bdev_io->u.bdev.iovcnt, remapped_offset,
326 							bdev_io->u.bdev.num_blocks,
327 							bdev_part_complete_read_io, bdev_io,
328 							bdev_io->u.bdev.ext_opts);
329 		} else {
330 			rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
331 							    bdev_io->u.bdev.iovs,
332 							    bdev_io->u.bdev.iovcnt,
333 							    bdev_io->u.bdev.md_buf, remapped_offset,
334 							    bdev_io->u.bdev.num_blocks,
335 							    bdev_part_complete_read_io, bdev_io);
336 		}
337 		break;
338 	case SPDK_BDEV_IO_TYPE_WRITE:
339 		rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
340 		if (rc != 0) {
341 			return SPDK_BDEV_IO_STATUS_FAILED;
342 		}
343 
344 		if (bdev_io->u.bdev.ext_opts || !bdev_io->u.bdev.md_buf) {
345 			rc = spdk_bdev_writev_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs,
346 							 bdev_io->u.bdev.iovcnt, remapped_offset,
347 							 bdev_io->u.bdev.num_blocks,
348 							 bdev_part_complete_io, bdev_io,
349 							 bdev_io->u.bdev.ext_opts);
350 		} else {
351 			rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
352 							     bdev_io->u.bdev.iovs,
353 							     bdev_io->u.bdev.iovcnt,
354 							     bdev_io->u.bdev.md_buf, remapped_offset,
355 							     bdev_io->u.bdev.num_blocks,
356 							     bdev_part_complete_io, bdev_io);
357 		}
358 		break;
359 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
360 		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
361 						   bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
362 						   bdev_io);
363 		break;
364 	case SPDK_BDEV_IO_TYPE_UNMAP:
365 		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
366 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
367 					    bdev_io);
368 		break;
369 	case SPDK_BDEV_IO_TYPE_FLUSH:
370 		rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
371 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
372 					    bdev_io);
373 		break;
374 	case SPDK_BDEV_IO_TYPE_RESET:
375 		rc = spdk_bdev_reset(base_desc, base_ch,
376 				     bdev_part_complete_io, bdev_io);
377 		break;
378 	case SPDK_BDEV_IO_TYPE_ZCOPY:
379 		rc = spdk_bdev_zcopy_start(base_desc, base_ch, NULL, 0, remapped_offset,
380 					   bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
381 					   bdev_part_complete_zcopy_io, bdev_io);
382 		break;
383 	case SPDK_BDEV_IO_TYPE_COMPARE:
384 		if (!bdev_io->u.bdev.md_buf) {
385 			rc = spdk_bdev_comparev_blocks(base_desc, base_ch,
386 						       bdev_io->u.bdev.iovs,
387 						       bdev_io->u.bdev.iovcnt,
388 						       remapped_offset,
389 						       bdev_io->u.bdev.num_blocks,
390 						       bdev_part_complete_io, bdev_io);
391 		} else {
392 			rc = spdk_bdev_comparev_blocks_with_md(base_desc, base_ch,
393 							       bdev_io->u.bdev.iovs,
394 							       bdev_io->u.bdev.iovcnt,
395 							       bdev_io->u.bdev.md_buf,
396 							       remapped_offset,
397 							       bdev_io->u.bdev.num_blocks,
398 							       bdev_part_complete_io, bdev_io);
399 		}
400 		break;
401 	case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
402 		rc = spdk_bdev_comparev_and_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
403 				bdev_io->u.bdev.iovcnt,
404 				bdev_io->u.bdev.fused_iovs,
405 				bdev_io->u.bdev.fused_iovcnt,
406 				remapped_offset,
407 				bdev_io->u.bdev.num_blocks,
408 				bdev_part_complete_io, bdev_io);
409 		break;
410 	default:
411 		SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
412 		return SPDK_BDEV_IO_STATUS_FAILED;
413 	}
414 
415 	return rc;
416 }
417 
418 static int
419 bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
420 {
421 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
422 	struct spdk_bdev_part_channel *ch = ctx_buf;
423 
424 	ch->part = part;
425 	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
426 	if (ch->base_ch == NULL) {
427 		return -1;
428 	}
429 
430 	if (part->internal.base->ch_create_cb) {
431 		return part->internal.base->ch_create_cb(io_device, ctx_buf);
432 	} else {
433 		return 0;
434 	}
435 }
436 
437 static void
438 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
439 {
440 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
441 	struct spdk_bdev_part_channel *ch = ctx_buf;
442 
443 	if (part->internal.base->ch_destroy_cb) {
444 		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
445 	}
446 	spdk_put_io_channel(ch->base_ch);
447 }
448 
449 static void
450 bdev_part_base_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
451 			void *event_ctx)
452 {
453 	struct spdk_bdev_part_base *base = event_ctx;
454 
455 	switch (type) {
456 	case SPDK_BDEV_EVENT_REMOVE:
457 		base->remove_cb(base);
458 		break;
459 	default:
460 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
461 		break;
462 	}
463 }
464 
465 int
466 spdk_bdev_part_base_construct_ext(const char *bdev_name,
467 				  spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
468 				  struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
469 				  spdk_bdev_part_base_free_fn free_fn, void *ctx,
470 				  uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
471 				  spdk_io_channel_destroy_cb ch_destroy_cb,
472 				  struct spdk_bdev_part_base **_base)
473 {
474 	int rc;
475 	struct spdk_bdev_part_base *base;
476 
477 	if (_base == NULL) {
478 		return -EINVAL;
479 	}
480 
481 	base = calloc(1, sizeof(*base));
482 	if (!base) {
483 		SPDK_ERRLOG("Memory allocation failure\n");
484 		return -ENOMEM;
485 	}
486 	fn_table->get_io_channel = bdev_part_get_io_channel;
487 	fn_table->io_type_supported = bdev_part_io_type_supported;
488 
489 	base->desc = NULL;
490 	base->ref = 0;
491 	base->module = module;
492 	base->fn_table = fn_table;
493 	base->tailq = tailq;
494 	base->base_free_fn = free_fn;
495 	base->ctx = ctx;
496 	base->claimed = false;
497 	base->channel_size = channel_size;
498 	base->ch_create_cb = ch_create_cb;
499 	base->ch_destroy_cb = ch_destroy_cb;
500 	base->remove_cb = remove_cb;
501 
502 	rc = spdk_bdev_open_ext(bdev_name, false, bdev_part_base_event_cb, base, &base->desc);
503 	if (rc) {
504 		if (rc == -ENODEV) {
505 			free(base);
506 		} else {
507 			SPDK_ERRLOG("could not open bdev %s: %s\n", bdev_name, spdk_strerror(-rc));
508 			spdk_bdev_part_base_free(base);
509 		}
510 		return rc;
511 	}
512 
513 	base->bdev = spdk_bdev_desc_get_bdev(base->desc);
514 
515 	/* Save the thread where the base device is opened */
516 	base->thread = spdk_get_thread();
517 
518 	*_base = base;
519 
520 	return 0;
521 }
522 
523 int
524 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
525 			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
526 			 char *product_name)
527 {
528 	part->internal.bdev.blocklen = base->bdev->blocklen;
529 	part->internal.bdev.blockcnt = num_blocks;
530 	part->internal.offset_blocks = offset_blocks;
531 
532 	part->internal.bdev.write_cache = base->bdev->write_cache;
533 	part->internal.bdev.required_alignment = base->bdev->required_alignment;
534 	part->internal.bdev.ctxt = part;
535 	part->internal.bdev.module = base->module;
536 	part->internal.bdev.fn_table = base->fn_table;
537 
538 	part->internal.bdev.md_interleave = base->bdev->md_interleave;
539 	part->internal.bdev.md_len = base->bdev->md_len;
540 	part->internal.bdev.dif_type = base->bdev->dif_type;
541 	part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
542 	part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
543 
544 	part->internal.bdev.name = strdup(name);
545 	if (part->internal.bdev.name == NULL) {
546 		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
547 		return -1;
548 	}
549 
550 	part->internal.bdev.product_name = strdup(product_name);
551 	if (part->internal.bdev.product_name == NULL) {
552 		free(part->internal.bdev.name);
553 		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
554 			    spdk_bdev_get_name(base->bdev));
555 		return -1;
556 	}
557 
558 	base->ref++;
559 	part->internal.base = base;
560 
561 	if (!base->claimed) {
562 		int rc;
563 
564 		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
565 		if (rc) {
566 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
567 			free(part->internal.bdev.name);
568 			free(part->internal.bdev.product_name);
569 			return -1;
570 		}
571 		base->claimed = true;
572 	}
573 
574 	spdk_io_device_register(part, bdev_part_channel_create_cb,
575 				bdev_part_channel_destroy_cb,
576 				base->channel_size,
577 				name);
578 
579 	spdk_bdev_register(&part->internal.bdev);
580 	TAILQ_INSERT_TAIL(base->tailq, part, tailq);
581 
582 	return 0;
583 }
584