xref: /spdk/lib/bdev/part.c (revision 2172c432cfdaecc5a279d64e37c6b51e794683c1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Common code for partition-like virtual bdevs.
36  */
37 
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
40 #include "spdk/log.h"
41 #include "spdk/string.h"
42 #include "spdk/thread.h"
43 
44 #include "spdk/bdev_module.h"
45 
46 struct spdk_bdev_part_base {
47 	struct spdk_bdev		*bdev;
48 	struct spdk_bdev_desc		*desc;
49 	uint32_t			ref;
50 	uint32_t			channel_size;
51 	spdk_bdev_part_base_free_fn	base_free_fn;
52 	void				*ctx;
53 	bool				claimed;
54 	struct spdk_bdev_module		*module;
55 	struct spdk_bdev_fn_table	*fn_table;
56 	struct bdev_part_tailq		*tailq;
57 	spdk_io_channel_create_cb	ch_create_cb;
58 	spdk_io_channel_destroy_cb	ch_destroy_cb;
59 	struct spdk_thread		*thread;
60 };
61 
62 struct spdk_bdev *
63 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
64 {
65 	return part_base->bdev;
66 }
67 
68 struct spdk_bdev_desc *
69 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
70 {
71 	return part_base->desc;
72 }
73 
74 struct bdev_part_tailq *
75 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
76 {
77 	return part_base->tailq;
78 }
79 
80 void *
81 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
82 {
83 	return part_base->ctx;
84 }
85 
86 const char *
87 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
88 {
89 	return part_base->bdev->name;
90 }
91 
92 static void
93 bdev_part_base_free(void *ctx)
94 {
95 	struct spdk_bdev_desc *desc = ctx;
96 
97 	spdk_bdev_close(desc);
98 }
99 
100 void
101 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
102 {
103 	if (base->desc) {
104 		/* Close the underlying bdev on its same opened thread. */
105 		if (base->thread && base->thread != spdk_get_thread()) {
106 			spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc);
107 		} else {
108 			spdk_bdev_close(base->desc);
109 		}
110 	}
111 
112 	if (base->base_free_fn != NULL) {
113 		base->base_free_fn(base->ctx);
114 	}
115 
116 	free(base);
117 }
118 
119 static void
120 bdev_part_free_cb(void *io_device)
121 {
122 	struct spdk_bdev_part *part = io_device;
123 	struct spdk_bdev_part_base *base;
124 
125 	assert(part);
126 	assert(part->internal.base);
127 
128 	base = part->internal.base;
129 
130 	TAILQ_REMOVE(base->tailq, part, tailq);
131 
132 	if (--base->ref == 0) {
133 		spdk_bdev_module_release_bdev(base->bdev);
134 		spdk_bdev_part_base_free(base);
135 	}
136 
137 	spdk_bdev_destruct_done(&part->internal.bdev, 0);
138 	free(part->internal.bdev.name);
139 	free(part->internal.bdev.product_name);
140 	free(part);
141 }
142 
143 int
144 spdk_bdev_part_free(struct spdk_bdev_part *part)
145 {
146 	spdk_io_device_unregister(part, bdev_part_free_cb);
147 
148 	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
149 	 * until spdk_bdev_destruct_done is called */
150 	return 1;
151 }
152 
153 void
154 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
155 {
156 	struct spdk_bdev_part *part, *tmp;
157 
158 	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
159 		if (part->internal.base == part_base) {
160 			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
161 		}
162 	}
163 }
164 
165 static bool
166 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
167 {
168 	struct spdk_bdev_part *part = _part;
169 
170 	/* We can't decode/modify passthrough NVMe commands, so don't report
171 	 *  that a partition supports these io types, even if the underlying
172 	 *  bdev does.
173 	 */
174 	switch (io_type) {
175 	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
176 	case SPDK_BDEV_IO_TYPE_NVME_IO:
177 	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
178 		return false;
179 	default:
180 		break;
181 	}
182 
183 	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
184 			io_type);
185 }
186 
187 static struct spdk_io_channel *
188 bdev_part_get_io_channel(void *_part)
189 {
190 	struct spdk_bdev_part *part = _part;
191 
192 	return spdk_get_io_channel(part);
193 }
194 
195 struct spdk_bdev *
196 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
197 {
198 	return &part->internal.bdev;
199 }
200 
201 struct spdk_bdev_part_base *
202 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
203 {
204 	return part->internal.base;
205 }
206 
207 struct spdk_bdev *
208 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
209 {
210 	return part->internal.base->bdev;
211 }
212 
213 uint64_t
214 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
215 {
216 	return part->internal.offset_blocks;
217 }
218 
219 static int
220 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
221 		    uint32_t remapped_offset)
222 {
223 	struct spdk_bdev *bdev = bdev_io->bdev;
224 	struct spdk_dif_ctx dif_ctx;
225 	struct spdk_dif_error err_blk = {};
226 	int rc;
227 
228 	if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
229 		return 0;
230 	}
231 
232 	rc = spdk_dif_ctx_init(&dif_ctx,
233 			       bdev->blocklen, bdev->md_len, bdev->md_interleave,
234 			       bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
235 			       offset, 0, 0, 0, 0);
236 	if (rc != 0) {
237 		SPDK_ERRLOG("Initialization of DIF context failed\n");
238 		return rc;
239 	}
240 
241 	spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
242 
243 	if (bdev->md_interleave) {
244 		rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
245 					    bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
246 	} else {
247 		struct iovec md_iov = {
248 			.iov_base	= bdev_io->u.bdev.md_buf,
249 			.iov_len	= bdev_io->u.bdev.num_blocks * bdev->md_len,
250 		};
251 
252 		rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
253 	}
254 
255 	if (rc != 0) {
256 		SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
257 			    err_blk.err_type, err_blk.err_offset);
258 	}
259 
260 	return rc;
261 }
262 
263 static void
264 bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
265 {
266 	struct spdk_bdev_io *part_io = cb_arg;
267 	uint32_t offset, remapped_offset;
268 	int rc, status;
269 
270 	offset = bdev_io->u.bdev.offset_blocks;
271 	remapped_offset = part_io->u.bdev.offset_blocks;
272 
273 	if (success) {
274 		rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
275 		if (rc != 0) {
276 			success = false;
277 		}
278 	}
279 
280 	status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
281 
282 	spdk_bdev_io_complete(part_io, status);
283 	spdk_bdev_free_io(bdev_io);
284 }
285 
286 static void
287 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
288 {
289 	struct spdk_bdev_io *part_io = cb_arg;
290 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
291 
292 	spdk_bdev_io_complete(part_io, status);
293 	spdk_bdev_free_io(bdev_io);
294 }
295 
296 static void
297 bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
298 {
299 	struct spdk_bdev_io *part_io = cb_arg;
300 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
301 
302 	spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len);
303 	spdk_bdev_io_complete(part_io, status);
304 	spdk_bdev_free_io(bdev_io);
305 }
306 
307 int
308 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
309 {
310 	struct spdk_bdev_part *part = ch->part;
311 	struct spdk_io_channel *base_ch = ch->base_ch;
312 	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
313 	uint64_t offset, remapped_offset;
314 	int rc = 0;
315 
316 	offset = bdev_io->u.bdev.offset_blocks;
317 	remapped_offset = offset + part->internal.offset_blocks;
318 
319 	/* Modify the I/O to adjust for the offset within the base bdev. */
320 	switch (bdev_io->type) {
321 	case SPDK_BDEV_IO_TYPE_READ:
322 		if (bdev_io->u.bdev.md_buf == NULL) {
323 			rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
324 						    bdev_io->u.bdev.iovcnt, remapped_offset,
325 						    bdev_io->u.bdev.num_blocks,
326 						    bdev_part_complete_read_io, bdev_io);
327 		} else {
328 			rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
329 							    bdev_io->u.bdev.iovs,
330 							    bdev_io->u.bdev.iovcnt,
331 							    bdev_io->u.bdev.md_buf, remapped_offset,
332 							    bdev_io->u.bdev.num_blocks,
333 							    bdev_part_complete_read_io, bdev_io);
334 		}
335 		break;
336 	case SPDK_BDEV_IO_TYPE_WRITE:
337 		rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
338 		if (rc != 0) {
339 			return SPDK_BDEV_IO_STATUS_FAILED;
340 		}
341 
342 		if (bdev_io->u.bdev.md_buf == NULL) {
343 			rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
344 						     bdev_io->u.bdev.iovcnt, remapped_offset,
345 						     bdev_io->u.bdev.num_blocks,
346 						     bdev_part_complete_io, bdev_io);
347 		} else {
348 			rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
349 							     bdev_io->u.bdev.iovs,
350 							     bdev_io->u.bdev.iovcnt,
351 							     bdev_io->u.bdev.md_buf, remapped_offset,
352 							     bdev_io->u.bdev.num_blocks,
353 							     bdev_part_complete_io, bdev_io);
354 		}
355 		break;
356 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
357 		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
358 						   bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
359 						   bdev_io);
360 		break;
361 	case SPDK_BDEV_IO_TYPE_UNMAP:
362 		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
363 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
364 					    bdev_io);
365 		break;
366 	case SPDK_BDEV_IO_TYPE_FLUSH:
367 		rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
368 					    bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
369 					    bdev_io);
370 		break;
371 	case SPDK_BDEV_IO_TYPE_RESET:
372 		rc = spdk_bdev_reset(base_desc, base_ch,
373 				     bdev_part_complete_io, bdev_io);
374 		break;
375 	case SPDK_BDEV_IO_TYPE_ZCOPY:
376 		rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset,
377 					   bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
378 					   bdev_part_complete_zcopy_io, bdev_io);
379 		break;
380 	default:
381 		SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
382 		return SPDK_BDEV_IO_STATUS_FAILED;
383 	}
384 
385 	return rc;
386 }
387 
388 static int
389 bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
390 {
391 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
392 	struct spdk_bdev_part_channel *ch = ctx_buf;
393 
394 	ch->part = part;
395 	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
396 	if (ch->base_ch == NULL) {
397 		return -1;
398 	}
399 
400 	if (part->internal.base->ch_create_cb) {
401 		return part->internal.base->ch_create_cb(io_device, ctx_buf);
402 	} else {
403 		return 0;
404 	}
405 }
406 
407 static void
408 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
409 {
410 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
411 	struct spdk_bdev_part_channel *ch = ctx_buf;
412 
413 	if (part->internal.base->ch_destroy_cb) {
414 		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
415 	}
416 	spdk_put_io_channel(ch->base_ch);
417 }
418 
419 struct spdk_bdev_part_base *
420 	spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
421 			      spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
422 			      struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
423 			      spdk_bdev_part_base_free_fn free_fn, void *ctx,
424 			      uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
425 			      spdk_io_channel_destroy_cb ch_destroy_cb)
426 {
427 	int rc;
428 	struct spdk_bdev_part_base *base;
429 
430 	base = calloc(1, sizeof(*base));
431 	if (!base) {
432 		SPDK_ERRLOG("Memory allocation failure\n");
433 		return NULL;
434 	}
435 	fn_table->get_io_channel = bdev_part_get_io_channel;
436 	fn_table->io_type_supported = bdev_part_io_type_supported;
437 
438 	base->bdev = bdev;
439 	base->desc = NULL;
440 	base->ref = 0;
441 	base->module = module;
442 	base->fn_table = fn_table;
443 	base->tailq = tailq;
444 	base->base_free_fn = free_fn;
445 	base->ctx = ctx;
446 	base->claimed = false;
447 	base->channel_size = channel_size;
448 	base->ch_create_cb = ch_create_cb;
449 	base->ch_destroy_cb = ch_destroy_cb;
450 
451 	rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
452 	if (rc) {
453 		spdk_bdev_part_base_free(base);
454 		SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
455 			    spdk_strerror(-rc));
456 		return NULL;
457 	}
458 
459 	/* Save the thread where the base device is opened */
460 	base->thread = spdk_get_thread();
461 
462 	return base;
463 }
464 
465 int
466 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
467 			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
468 			 char *product_name)
469 {
470 	part->internal.bdev.blocklen = base->bdev->blocklen;
471 	part->internal.bdev.blockcnt = num_blocks;
472 	part->internal.offset_blocks = offset_blocks;
473 
474 	part->internal.bdev.write_cache = base->bdev->write_cache;
475 	part->internal.bdev.required_alignment = base->bdev->required_alignment;
476 	part->internal.bdev.ctxt = part;
477 	part->internal.bdev.module = base->module;
478 	part->internal.bdev.fn_table = base->fn_table;
479 
480 	part->internal.bdev.md_interleave = base->bdev->md_interleave;
481 	part->internal.bdev.md_len = base->bdev->md_len;
482 	part->internal.bdev.dif_type = base->bdev->dif_type;
483 	part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
484 	part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
485 
486 	part->internal.bdev.name = strdup(name);
487 	if (part->internal.bdev.name == NULL) {
488 		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
489 		return -1;
490 	}
491 
492 	part->internal.bdev.product_name = strdup(product_name);
493 	if (part->internal.bdev.product_name == NULL) {
494 		free(part->internal.bdev.name);
495 		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
496 			    spdk_bdev_get_name(base->bdev));
497 		return -1;
498 	}
499 
500 	base->ref++;
501 	part->internal.base = base;
502 
503 	if (!base->claimed) {
504 		int rc;
505 
506 		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
507 		if (rc) {
508 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
509 			free(part->internal.bdev.name);
510 			free(part->internal.bdev.product_name);
511 			return -1;
512 		}
513 		base->claimed = true;
514 	}
515 
516 	spdk_io_device_register(part, bdev_part_channel_create_cb,
517 				bdev_part_channel_destroy_cb,
518 				base->channel_size,
519 				name);
520 
521 	spdk_bdev_register(&part->internal.bdev);
522 	TAILQ_INSERT_TAIL(base->tailq, part, tailq);
523 
524 	return 0;
525 }
526