xref: /spdk/lib/bdev/part.c (revision ae7b5890ef728af40bd233a5011b924c482603bf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Common code for partition-like virtual bdevs.
36  */
37 
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
40 #include "spdk/log.h"
41 #include "spdk/string.h"
42 
43 #include "spdk/bdev_module.h"
44 
45 struct spdk_bdev_part_base {
46 	struct spdk_bdev		*bdev;
47 	struct spdk_bdev_desc		*desc;
48 	uint32_t			ref;
49 	uint32_t			channel_size;
50 	spdk_bdev_part_base_free_fn	base_free_fn;
51 	void				*ctx;
52 	bool				claimed;
53 	struct spdk_bdev_module		*module;
54 	struct spdk_bdev_fn_table	*fn_table;
55 	struct bdev_part_tailq		*tailq;
56 	spdk_io_channel_create_cb	ch_create_cb;
57 	spdk_io_channel_destroy_cb	ch_destroy_cb;
58 };
59 
60 struct spdk_bdev *
61 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
62 {
63 	return part_base->bdev;
64 }
65 
66 struct spdk_bdev_desc *
67 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
68 {
69 	return part_base->desc;
70 }
71 
72 struct bdev_part_tailq *
73 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
74 {
75 	return part_base->tailq;
76 }
77 
78 void *
79 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
80 {
81 	return part_base->ctx;
82 }
83 
84 void
85 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
86 {
87 	if (base->desc) {
88 		spdk_bdev_close(base->desc);
89 		base->desc = NULL;
90 	}
91 
92 	if (base->base_free_fn != NULL) {
93 		base->base_free_fn(base->ctx);
94 	}
95 
96 	free(base);
97 }
98 
99 static void
100 spdk_bdev_part_free_cb(void *io_device)
101 {
102 	struct spdk_bdev_part *part = io_device;
103 	struct spdk_bdev_part_base *base;
104 
105 	assert(part);
106 	assert(part->internal.base);
107 
108 	base = part->internal.base;
109 
110 	TAILQ_REMOVE(base->tailq, part, tailq);
111 
112 	if (__sync_sub_and_fetch(&base->ref, 1) == 0) {
113 		spdk_bdev_module_release_bdev(base->bdev);
114 		spdk_bdev_part_base_free(base);
115 	}
116 
117 	spdk_bdev_destruct_done(&part->internal.bdev, 0);
118 	free(part->internal.bdev.name);
119 	free(part->internal.bdev.product_name);
120 	free(part);
121 }
122 
123 int
124 spdk_bdev_part_free(struct spdk_bdev_part *part)
125 {
126 	spdk_io_device_unregister(part, spdk_bdev_part_free_cb);
127 
128 	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
129 	 * until spdk_bdev_destruct_done is called */
130 	return 1;
131 }
132 
133 void
134 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
135 {
136 	struct spdk_bdev_part *part, *tmp;
137 
138 	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
139 		if (part->internal.base == part_base) {
140 			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
141 		}
142 	}
143 }
144 
145 static bool
146 spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
147 {
148 	struct spdk_bdev_part *part = _part;
149 
150 	/* We can't decode/modify passthrough NVMe commands, so don't report
151 	 *  that a partition supports these io types, even if the underlying
152 	 *  bdev does.
153 	 */
154 	switch (io_type) {
155 	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
156 	case SPDK_BDEV_IO_TYPE_NVME_IO:
157 	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
158 		return false;
159 	default:
160 		break;
161 	}
162 
163 	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
164 			io_type);
165 }
166 
167 static struct spdk_io_channel *
168 spdk_bdev_part_get_io_channel(void *_part)
169 {
170 	struct spdk_bdev_part *part = _part;
171 
172 	return spdk_get_io_channel(part);
173 }
174 
175 struct spdk_bdev *
176 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
177 {
178 	return &part->internal.bdev;
179 }
180 
181 struct spdk_bdev_part_base *
182 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
183 {
184 	return part->internal.base;
185 }
186 
187 struct spdk_bdev *
188 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
189 {
190 	return part->internal.base->bdev;
191 }
192 
193 uint64_t
194 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
195 {
196 	return part->internal.offset_blocks;
197 }
198 
199 static int
200 spdk_bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
201 			 uint32_t remapped_offset)
202 {
203 	struct spdk_bdev *bdev = bdev_io->bdev;
204 	struct spdk_dif_ctx dif_ctx;
205 	struct spdk_dif_error err_blk = {};
206 	int rc;
207 
208 	if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
209 		return 0;
210 	}
211 
212 	rc = spdk_dif_ctx_init(&dif_ctx,
213 			       bdev->blocklen, bdev->md_len, bdev->md_interleave,
214 			       bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
215 			       offset, 0, 0, 0, 0);
216 	if (rc != 0) {
217 		SPDK_ERRLOG("Initialization of DIF context failed\n");
218 		return rc;
219 	}
220 
221 	spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
222 
223 	if (bdev->md_interleave) {
224 		rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
225 					    bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
226 	} else {
227 		struct iovec md_iov = {
228 			.iov_base	= bdev_io->u.bdev.md_buf,
229 			.iov_len	= bdev_io->u.bdev.num_blocks * bdev->md_len,
230 		};
231 
232 		rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
233 	}
234 
235 	if (rc != 0) {
236 		SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
237 			    err_blk.err_type, err_blk.err_offset);
238 	}
239 
240 	return rc;
241 }
242 
243 static void
244 spdk_bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
245 {
246 	struct spdk_bdev_io *part_io = cb_arg;
247 	uint32_t offset, remapped_offset;
248 	int rc, status;
249 
250 	offset = bdev_io->u.bdev.offset_blocks;
251 	remapped_offset = part_io->u.bdev.offset_blocks;
252 
253 	if (success) {
254 		rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset);
255 		if (rc != 0) {
256 			success = false;
257 		}
258 	}
259 
260 	status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
261 
262 	spdk_bdev_io_complete(part_io, status);
263 	spdk_bdev_free_io(bdev_io);
264 }
265 
266 static void
267 spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
268 {
269 	struct spdk_bdev_io *part_io = cb_arg;
270 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
271 
272 	spdk_bdev_io_complete(part_io, status);
273 	spdk_bdev_free_io(bdev_io);
274 }
275 
276 int
277 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
278 {
279 	struct spdk_bdev_part *part = ch->part;
280 	struct spdk_io_channel *base_ch = ch->base_ch;
281 	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
282 	uint64_t offset, remapped_offset;
283 	int rc = 0;
284 
285 	offset = bdev_io->u.bdev.offset_blocks;
286 	remapped_offset = offset + part->internal.offset_blocks;
287 
288 	/* Modify the I/O to adjust for the offset within the base bdev. */
289 	switch (bdev_io->type) {
290 	case SPDK_BDEV_IO_TYPE_READ:
291 		if (bdev_io->u.bdev.md_buf == NULL) {
292 			rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
293 						    bdev_io->u.bdev.iovcnt, remapped_offset,
294 						    bdev_io->u.bdev.num_blocks,
295 						    spdk_bdev_part_complete_read_io, bdev_io);
296 		} else {
297 			rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
298 							    bdev_io->u.bdev.iovs,
299 							    bdev_io->u.bdev.iovcnt,
300 							    bdev_io->u.bdev.md_buf, remapped_offset,
301 							    bdev_io->u.bdev.num_blocks,
302 							    spdk_bdev_part_complete_read_io, bdev_io);
303 		}
304 		break;
305 	case SPDK_BDEV_IO_TYPE_WRITE:
306 		rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset);
307 		if (rc != 0) {
308 			return SPDK_BDEV_IO_STATUS_FAILED;
309 		}
310 
311 		if (bdev_io->u.bdev.md_buf == NULL) {
312 			rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
313 						     bdev_io->u.bdev.iovcnt, remapped_offset,
314 						     bdev_io->u.bdev.num_blocks,
315 						     spdk_bdev_part_complete_io, bdev_io);
316 		} else {
317 			rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
318 							     bdev_io->u.bdev.iovs,
319 							     bdev_io->u.bdev.iovcnt,
320 							     bdev_io->u.bdev.md_buf, remapped_offset,
321 							     bdev_io->u.bdev.num_blocks,
322 							     spdk_bdev_part_complete_io, bdev_io);
323 		}
324 		break;
325 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
326 		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
327 						   bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
328 						   bdev_io);
329 		break;
330 	case SPDK_BDEV_IO_TYPE_UNMAP:
331 		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
332 					    bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
333 					    bdev_io);
334 		break;
335 	case SPDK_BDEV_IO_TYPE_FLUSH:
336 		rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
337 					    bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
338 					    bdev_io);
339 		break;
340 	case SPDK_BDEV_IO_TYPE_RESET:
341 		rc = spdk_bdev_reset(base_desc, base_ch,
342 				     spdk_bdev_part_complete_io, bdev_io);
343 		break;
344 	default:
345 		SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
346 		return SPDK_BDEV_IO_STATUS_FAILED;
347 	}
348 
349 	return rc;
350 }
351 
352 static int
353 spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
354 {
355 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
356 	struct spdk_bdev_part_channel *ch = ctx_buf;
357 
358 	ch->part = part;
359 	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
360 	if (ch->base_ch == NULL) {
361 		return -1;
362 	}
363 
364 	if (part->internal.base->ch_create_cb) {
365 		return part->internal.base->ch_create_cb(io_device, ctx_buf);
366 	} else {
367 		return 0;
368 	}
369 }
370 
371 static void
372 spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
373 {
374 	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
375 	struct spdk_bdev_part_channel *ch = ctx_buf;
376 
377 	if (part->internal.base->ch_destroy_cb) {
378 		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
379 	}
380 	spdk_put_io_channel(ch->base_ch);
381 }
382 
383 struct spdk_bdev_part_base *
384 	spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
385 			      spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
386 			      struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
387 			      spdk_bdev_part_base_free_fn free_fn, void *ctx,
388 			      uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
389 			      spdk_io_channel_destroy_cb ch_destroy_cb)
390 {
391 	int rc;
392 	struct spdk_bdev_part_base *base;
393 
394 	base = calloc(1, sizeof(*base));
395 	if (!base) {
396 		SPDK_ERRLOG("Memory allocation failure\n");
397 		return NULL;
398 	}
399 	fn_table->get_io_channel = spdk_bdev_part_get_io_channel;
400 	fn_table->io_type_supported = spdk_bdev_part_io_type_supported;
401 
402 	base->bdev = bdev;
403 	base->desc = NULL;
404 	base->ref = 0;
405 	base->module = module;
406 	base->fn_table = fn_table;
407 	base->tailq = tailq;
408 	base->base_free_fn = free_fn;
409 	base->ctx = ctx;
410 	base->claimed = false;
411 	base->channel_size = channel_size;
412 	base->ch_create_cb = ch_create_cb;
413 	base->ch_destroy_cb = ch_destroy_cb;
414 
415 	rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
416 	if (rc) {
417 		spdk_bdev_part_base_free(base);
418 		SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
419 			    spdk_strerror(-rc));
420 		return NULL;
421 	}
422 
423 	return base;
424 }
425 
426 int
427 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
428 			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
429 			 char *product_name)
430 {
431 	part->internal.bdev.blocklen = base->bdev->blocklen;
432 	part->internal.bdev.blockcnt = num_blocks;
433 	part->internal.offset_blocks = offset_blocks;
434 
435 	part->internal.bdev.write_cache = base->bdev->write_cache;
436 	part->internal.bdev.required_alignment = base->bdev->required_alignment;
437 	part->internal.bdev.ctxt = part;
438 	part->internal.bdev.module = base->module;
439 	part->internal.bdev.fn_table = base->fn_table;
440 
441 	part->internal.bdev.md_interleave = base->bdev->md_interleave;
442 	part->internal.bdev.md_len = base->bdev->md_len;
443 	part->internal.bdev.dif_type = base->bdev->dif_type;
444 	part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
445 	part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
446 
447 	part->internal.bdev.name = strdup(name);
448 	part->internal.bdev.product_name = strdup(product_name);
449 
450 	if (part->internal.bdev.name == NULL) {
451 		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
452 		return -1;
453 	} else if (part->internal.bdev.product_name == NULL) {
454 		free(part->internal.bdev.name);
455 		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
456 			    spdk_bdev_get_name(base->bdev));
457 		return -1;
458 	}
459 
460 	__sync_fetch_and_add(&base->ref, 1);
461 	part->internal.base = base;
462 
463 	if (!base->claimed) {
464 		int rc;
465 
466 		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
467 		if (rc) {
468 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
469 			free(part->internal.bdev.name);
470 			free(part->internal.bdev.product_name);
471 			return -1;
472 		}
473 		base->claimed = true;
474 	}
475 
476 	spdk_io_device_register(part, spdk_bdev_part_channel_create_cb,
477 				spdk_bdev_part_channel_destroy_cb,
478 				base->channel_size,
479 				name);
480 
481 	spdk_bdev_register(&part->internal.bdev);
482 	TAILQ_INSERT_TAIL(base->tailq, part, tailq);
483 
484 	return 0;
485 }
486