xref: /spdk/lib/bdev/part.c (revision 712a3f69d32632bf6c862f00200f7f437d3f7529)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Common code for partition-like virtual bdevs.
36  */
37 
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
40 #include "spdk/log.h"
41 #include "spdk/string.h"
42 
43 #include "spdk/bdev_module.h"
44 
45 struct spdk_bdev_part_base {
46 	struct spdk_bdev		*bdev;
47 	struct spdk_bdev_desc		*desc;
48 	uint32_t			ref;
49 	uint32_t			channel_size;
50 	spdk_bdev_part_base_free_fn	base_free_fn;
51 	void				*ctx;
52 	bool				claimed;
53 	struct spdk_bdev_module		*module;
54 	struct spdk_bdev_fn_table	*fn_table;
55 	struct bdev_part_tailq		*tailq;
56 	spdk_io_channel_create_cb	ch_create_cb;
57 	spdk_io_channel_destroy_cb	ch_destroy_cb;
58 };
59 
60 struct spdk_bdev *
61 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
62 {
63 	return part_base->bdev;
64 }
65 
66 struct spdk_bdev_desc *
67 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
68 {
69 	return part_base->desc;
70 }
71 
72 struct bdev_part_tailq *
73 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
74 {
75 	return part_base->tailq;
76 }
77 
78 void *
79 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
80 {
81 	return part_base->ctx;
82 }
83 
84 const char *
85 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
86 {
87 	return part_base->bdev->name;
88 }
89 
90 void
91 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
92 {
93 	if (base->desc) {
94 		spdk_bdev_close(base->desc);
95 		base->desc = NULL;
96 	}
97 
98 	if (base->base_free_fn != NULL) {
99 		base->base_free_fn(base->ctx);
100 	}
101 
102 	free(base);
103 }
104 
105 static void
106 spdk_bdev_part_free_cb(void *io_device)
107 {
108 	struct spdk_bdev_part *part = io_device;
109 	struct spdk_bdev_part_base *base;
110 
111 	assert(part);
112 	assert(part->internal.base);
113 
114 	base = part->internal.base;
115 
116 	TAILQ_REMOVE(base->tailq, part, tailq);
117 
118 	if (--base->ref == 0) {
119 		spdk_bdev_module_release_bdev(base->bdev);
120 		spdk_bdev_part_base_free(base);
121 	}
122 
123 	spdk_bdev_destruct_done(&part->internal.bdev, 0);
124 	free(part->internal.bdev.name);
125 	free(part->internal.bdev.product_name);
126 	free(part);
127 }
128 
129 int
130 spdk_bdev_part_free(struct spdk_bdev_part *part)
131 {
132 	spdk_io_device_unregister(part, spdk_bdev_part_free_cb);
133 
134 	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
135 	 * until spdk_bdev_destruct_done is called */
136 	return 1;
137 }
138 
139 void
140 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
141 {
142 	struct spdk_bdev_part *part, *tmp;
143 
144 	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
145 		if (part->internal.base == part_base) {
146 			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
147 		}
148 	}
149 }
150 
151 static bool
152 spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
153 {
154 	struct spdk_bdev_part *part = _part;
155 
156 	/* We can't decode/modify passthrough NVMe commands, so don't report
157 	 *  that a partition supports these io types, even if the underlying
158 	 *  bdev does.
159 	 */
160 	switch (io_type) {
161 	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
162 	case SPDK_BDEV_IO_TYPE_NVME_IO:
163 	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
164 		return false;
165 	default:
166 		break;
167 	}
168 
169 	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
170 			io_type);
171 }
172 
173 static struct spdk_io_channel *
174 spdk_bdev_part_get_io_channel(void *_part)
175 {
176 	struct spdk_bdev_part *part = _part;
177 
178 	return spdk_get_io_channel(part);
179 }
180 
181 struct spdk_bdev *
182 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
183 {
184 	return &part->internal.bdev;
185 }
186 
187 struct spdk_bdev_part_base *
188 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
189 {
190 	return part->internal.base;
191 }
192 
193 struct spdk_bdev *
194 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
195 {
196 	return part->internal.base->bdev;
197 }
198 
199 uint64_t
200 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
201 {
202 	return part->internal.offset_blocks;
203 }
204 
205 static int
206 spdk_bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
207 			 uint32_t remapped_offset)
208 {
209 	struct spdk_bdev *bdev = bdev_io->bdev;
210 	struct spdk_dif_ctx dif_ctx;
211 	struct spdk_dif_error err_blk = {};
212 	int rc;
213 
214 	if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
215 		return 0;
216 	}
217 
218 	rc = spdk_dif_ctx_init(&dif_ctx,
219 			       bdev->blocklen, bdev->md_len, bdev->md_interleave,
220 			       bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
221 			       offset, 0, 0, 0, 0);
222 	if (rc != 0) {
223 		SPDK_ERRLOG("Initialization of DIF context failed\n");
224 		return rc;
225 	}
226 
227 	spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
228 
229 	if (bdev->md_interleave) {
230 		rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
231 					    bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
232 	} else {
233 		struct iovec md_iov = {
234 			.iov_base	= bdev_io->u.bdev.md_buf,
235 			.iov_len	= bdev_io->u.bdev.num_blocks * bdev->md_len,
236 		};
237 
238 		rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
239 	}
240 
241 	if (rc != 0) {
242 		SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
243 			    err_blk.err_type, err_blk.err_offset);
244 	}
245 
246 	return rc;
247 }
248 
249 static void
250 spdk_bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
251 {
252 	struct spdk_bdev_io *part_io = cb_arg;
253 	uint32_t offset, remapped_offset;
254 	int rc, status;
255 
256 	offset = bdev_io->u.bdev.offset_blocks;
257 	remapped_offset = part_io->u.bdev.offset_blocks;
258 
259 	if (success) {
260 		rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset);
261 		if (rc != 0) {
262 			success = false;
263 		}
264 	}
265 
266 	status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
267 
268 	spdk_bdev_io_complete(part_io, status);
269 	spdk_bdev_free_io(bdev_io);
270 }
271 
272 static void
273 spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
274 {
275 	struct spdk_bdev_io *part_io = cb_arg;
276 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
277 
278 	spdk_bdev_io_complete(part_io, status);
279 	spdk_bdev_free_io(bdev_io);
280 }
281 
282 static void
283 spdk_bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
284 {
285 	struct spdk_bdev_io *part_io = cb_arg;
286 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
287 
288 	spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len);
289 	spdk_bdev_io_complete(part_io, status);
290 	spdk_bdev_free_io(bdev_io);
291 }
292 
293 int
294 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
295 {
296 	struct spdk_bdev_part *part = ch->part;
297 	struct spdk_io_channel *base_ch = ch->base_ch;
298 	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
299 	uint64_t offset, remapped_offset;
300 	int rc = 0;
301 
302 	offset = bdev_io->u.bdev.offset_blocks;
303 	remapped_offset = offset + part->internal.offset_blocks;
304 
305 	/* Modify the I/O to adjust for the offset within the base bdev. */
306 	switch (bdev_io->type) {
307 	case SPDK_BDEV_IO_TYPE_READ:
308 		if (bdev_io->u.bdev.md_buf == NULL) {
309 			rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
310 						    bdev_io->u.bdev.iovcnt, remapped_offset,
311 						    bdev_io->u.bdev.num_blocks,
312 						    spdk_bdev_part_complete_read_io, bdev_io);
313 		} else {
314 			rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
315 							    bdev_io->u.bdev.iovs,
316 							    bdev_io->u.bdev.iovcnt,
317 							    bdev_io->u.bdev.md_buf, remapped_offset,
318 							    bdev_io->u.bdev.num_blocks,
319 							    spdk_bdev_part_complete_read_io, bdev_io);
320 		}
321 		break;
322 	case SPDK_BDEV_IO_TYPE_WRITE:
323 		rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset);
324 		if (rc != 0) {
325 			return SPDK_BDEV_IO_STATUS_FAILED;
326 		}
327 
328 		if (bdev_io->u.bdev.md_buf == NULL) {
329 			rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
330 						     bdev_io->u.bdev.iovcnt, remapped_offset,
331 						     bdev_io->u.bdev.num_blocks,
332 						     spdk_bdev_part_complete_io, bdev_io);
333 		} else {
334 			rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
335 							     bdev_io->u.bdev.iovs,
336 							     bdev_io->u.bdev.iovcnt,
337 							     bdev_io->u.bdev.md_buf, remapped_offset,
338 							     bdev_io->u.bdev.num_blocks,
339 							     spdk_bdev_part_complete_io, bdev_io);
340 		}
341 		break;
342 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
343 		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
344 						   bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
345 						   bdev_io);
346 		break;
347 	case SPDK_BDEV_IO_TYPE_UNMAP:
348 		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
349 					    bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
350 					    bdev_io);
351 		break;
352 	case SPDK_BDEV_IO_TYPE_FLUSH:
353 		rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
354 					    bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
355 					    bdev_io);
356 		break;
357 	case SPDK_BDEV_IO_TYPE_RESET:
358 		rc = spdk_bdev_reset(base_desc, base_ch,
359 				     spdk_bdev_part_complete_io, bdev_io);
360 		break;
361 	case SPDK_BDEV_IO_TYPE_ZCOPY:
362 		rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset,
363 					   bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
364 					   spdk_bdev_part_complete_zcopy_io, bdev_io);
365 		break;
366 	default:
367 		SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
368 		return SPDK_BDEV_IO_STATUS_FAILED;
369 	}
370 
371 	return rc;
372 }
373 
374 static int
375 spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
376 {
377 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
378 	struct spdk_bdev_part_channel *ch = ctx_buf;
379 
380 	ch->part = part;
381 	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
382 	if (ch->base_ch == NULL) {
383 		return -1;
384 	}
385 
386 	if (part->internal.base->ch_create_cb) {
387 		return part->internal.base->ch_create_cb(io_device, ctx_buf);
388 	} else {
389 		return 0;
390 	}
391 }
392 
393 static void
394 spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
395 {
396 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
397 	struct spdk_bdev_part_channel *ch = ctx_buf;
398 
399 	if (part->internal.base->ch_destroy_cb) {
400 		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
401 	}
402 	spdk_put_io_channel(ch->base_ch);
403 }
404 
405 struct spdk_bdev_part_base *
406 	spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
407 			      spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
408 			      struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
409 			      spdk_bdev_part_base_free_fn free_fn, void *ctx,
410 			      uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
411 			      spdk_io_channel_destroy_cb ch_destroy_cb)
412 {
413 	int rc;
414 	struct spdk_bdev_part_base *base;
415 
416 	base = calloc(1, sizeof(*base));
417 	if (!base) {
418 		SPDK_ERRLOG("Memory allocation failure\n");
419 		return NULL;
420 	}
421 	fn_table->get_io_channel = spdk_bdev_part_get_io_channel;
422 	fn_table->io_type_supported = spdk_bdev_part_io_type_supported;
423 
424 	base->bdev = bdev;
425 	base->desc = NULL;
426 	base->ref = 0;
427 	base->module = module;
428 	base->fn_table = fn_table;
429 	base->tailq = tailq;
430 	base->base_free_fn = free_fn;
431 	base->ctx = ctx;
432 	base->claimed = false;
433 	base->channel_size = channel_size;
434 	base->ch_create_cb = ch_create_cb;
435 	base->ch_destroy_cb = ch_destroy_cb;
436 
437 	rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
438 	if (rc) {
439 		spdk_bdev_part_base_free(base);
440 		SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
441 			    spdk_strerror(-rc));
442 		return NULL;
443 	}
444 
445 	return base;
446 }
447 
448 int
449 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
450 			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
451 			 char *product_name)
452 {
453 	part->internal.bdev.blocklen = base->bdev->blocklen;
454 	part->internal.bdev.blockcnt = num_blocks;
455 	part->internal.offset_blocks = offset_blocks;
456 
457 	part->internal.bdev.write_cache = base->bdev->write_cache;
458 	part->internal.bdev.required_alignment = base->bdev->required_alignment;
459 	part->internal.bdev.ctxt = part;
460 	part->internal.bdev.module = base->module;
461 	part->internal.bdev.fn_table = base->fn_table;
462 
463 	part->internal.bdev.md_interleave = base->bdev->md_interleave;
464 	part->internal.bdev.md_len = base->bdev->md_len;
465 	part->internal.bdev.dif_type = base->bdev->dif_type;
466 	part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
467 	part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
468 
469 	part->internal.bdev.name = strdup(name);
470 	part->internal.bdev.product_name = strdup(product_name);
471 
472 	if (part->internal.bdev.name == NULL) {
473 		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
474 		return -1;
475 	} else if (part->internal.bdev.product_name == NULL) {
476 		free(part->internal.bdev.name);
477 		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
478 			    spdk_bdev_get_name(base->bdev));
479 		return -1;
480 	}
481 
482 	base->ref++;
483 	part->internal.base = base;
484 
485 	if (!base->claimed) {
486 		int rc;
487 
488 		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
489 		if (rc) {
490 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
491 			free(part->internal.bdev.name);
492 			free(part->internal.bdev.product_name);
493 			return -1;
494 		}
495 		base->claimed = true;
496 	}
497 
498 	spdk_io_device_register(part, spdk_bdev_part_channel_create_cb,
499 				spdk_bdev_part_channel_destroy_cb,
500 				base->channel_size,
501 				name);
502 
503 	spdk_bdev_register(&part->internal.bdev);
504 	TAILQ_INSERT_TAIL(base->tailq, part, tailq);
505 
506 	return 0;
507 }
508