xref: /spdk/module/blob/bdev/blob_bdev.c (revision b02581a89058ebaebe03bd0e16e3b58adfe406c1)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "spdk/blob_bdev.h"
10 #include "spdk/blob.h"
11 #include "spdk/thread.h"
12 #include "spdk/log.h"
13 #include "spdk/endian.h"
14 #define __SPDK_BDEV_MODULE_ONLY
15 #include "spdk/bdev_module.h"
16 
17 struct blob_bdev {
18 	struct spdk_bs_dev	bs_dev;
19 	struct spdk_bdev	*bdev;
20 	struct spdk_bdev_desc	*desc;
21 	bool			write;
22 	int32_t			refs;
23 	struct spdk_spinlock	lock;
24 };
25 
26 struct blob_resubmit {
27 	struct spdk_bdev_io_wait_entry bdev_io_wait;
28 	enum spdk_bdev_io_type io_type;
29 	struct spdk_bs_dev *dev;
30 	struct spdk_io_channel *channel;
31 	void *payload;
32 	int iovcnt;
33 	uint64_t lba;
34 	uint64_t src_lba;
35 	uint32_t lba_count;
36 	struct spdk_bs_dev_cb_args *cb_args;
37 	struct spdk_blob_ext_io_opts *ext_io_opts;
38 };
39 static void bdev_blob_resubmit(void *);
40 
41 static inline struct spdk_bdev_desc *
42 __get_desc(struct spdk_bs_dev *dev)
43 {
44 	return ((struct blob_bdev *)dev)->desc;
45 }
46 
47 static inline struct spdk_bdev *
48 __get_bdev(struct spdk_bs_dev *dev)
49 {
50 	return ((struct blob_bdev *)dev)->bdev;
51 }
52 
53 static void
54 bdev_blob_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg)
55 {
56 	struct spdk_bs_dev_cb_args *cb_args = arg;
57 	int bserrno;
58 
59 	if (success) {
60 		bserrno = 0;
61 	} else {
62 		bserrno = -EIO;
63 	}
64 	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno);
65 	spdk_bdev_free_io(bdev_io);
66 }
67 
68 static void
69 bdev_blob_queue_io(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
70 		   int iovcnt, uint64_t lba, uint64_t src_lba, uint32_t lba_count,
71 		   enum spdk_bdev_io_type io_type, struct spdk_bs_dev_cb_args *cb_args,
72 		   struct spdk_blob_ext_io_opts *ext_io_opts)
73 {
74 	int rc;
75 	struct spdk_bdev *bdev = __get_bdev(dev);
76 	struct blob_resubmit *ctx;
77 
78 	ctx = calloc(1, sizeof(struct blob_resubmit));
79 
80 	if (ctx == NULL) {
81 		SPDK_ERRLOG("Not enough memory to queue io\n");
82 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -ENOMEM);
83 		return;
84 	}
85 
86 	ctx->io_type = io_type;
87 	ctx->dev = dev;
88 	ctx->channel = channel;
89 	ctx->payload = payload;
90 	ctx->iovcnt = iovcnt;
91 	ctx->lba = lba;
92 	ctx->src_lba = src_lba;
93 	ctx->lba_count = lba_count;
94 	ctx->cb_args = cb_args;
95 	ctx->bdev_io_wait.bdev = bdev;
96 	ctx->bdev_io_wait.cb_fn = bdev_blob_resubmit;
97 	ctx->bdev_io_wait.cb_arg = ctx;
98 	ctx->ext_io_opts = ext_io_opts;
99 
100 	rc = spdk_bdev_queue_io_wait(bdev, channel, &ctx->bdev_io_wait);
101 	if (rc != 0) {
102 		SPDK_ERRLOG("Queue io failed, rc=%d\n", rc);
103 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
104 		free(ctx);
105 		assert(false);
106 	}
107 }
108 
109 static void
110 bdev_blob_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
111 	       uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
112 {
113 	int rc;
114 
115 	rc = spdk_bdev_read_blocks(__get_desc(dev), channel, payload, lba,
116 				   lba_count, bdev_blob_io_complete, cb_args);
117 	if (rc == -ENOMEM) {
118 		bdev_blob_queue_io(dev, channel, payload, 0, lba, 0,
119 				   lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args, NULL);
120 	} else if (rc != 0) {
121 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
122 	}
123 }
124 
125 static void
126 bdev_blob_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
127 		uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
128 {
129 	int rc;
130 
131 	rc = spdk_bdev_write_blocks(__get_desc(dev), channel, payload, lba,
132 				    lba_count, bdev_blob_io_complete, cb_args);
133 	if (rc == -ENOMEM) {
134 		bdev_blob_queue_io(dev, channel, payload, 0, lba, 0,
135 				   lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args, NULL);
136 	} else if (rc != 0) {
137 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
138 	}
139 }
140 
141 static void
142 bdev_blob_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
143 		struct iovec *iov, int iovcnt,
144 		uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
145 {
146 	int rc;
147 
148 	rc = spdk_bdev_readv_blocks(__get_desc(dev), channel, iov, iovcnt, lba,
149 				    lba_count, bdev_blob_io_complete, cb_args);
150 	if (rc == -ENOMEM) {
151 		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0,
152 				   lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args, NULL);
153 	} else if (rc != 0) {
154 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
155 	}
156 }
157 
158 static void
159 bdev_blob_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
160 		 struct iovec *iov, int iovcnt,
161 		 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
162 {
163 	int rc;
164 
165 	rc = spdk_bdev_writev_blocks(__get_desc(dev), channel, iov, iovcnt, lba,
166 				     lba_count, bdev_blob_io_complete, cb_args);
167 	if (rc == -ENOMEM) {
168 		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0,
169 				   lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args, NULL);
170 	} else if (rc != 0) {
171 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
172 	}
173 }
174 
175 static inline void
176 blob_ext_io_opts_to_bdev_opts(struct spdk_bdev_ext_io_opts *dst, struct spdk_blob_ext_io_opts *src)
177 {
178 	memset(dst, 0, sizeof(*dst));
179 	dst->size = sizeof(*dst);
180 	dst->memory_domain = src->memory_domain;
181 	dst->memory_domain_ctx = src->memory_domain_ctx;
182 }
183 
184 static void
185 bdev_blob_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
186 		    struct iovec *iov, int iovcnt,
187 		    uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args,
188 		    struct spdk_blob_ext_io_opts *io_opts)
189 {
190 	struct spdk_bdev_ext_io_opts bdev_io_opts;
191 	int rc;
192 
193 	blob_ext_io_opts_to_bdev_opts(&bdev_io_opts, io_opts);
194 	rc = spdk_bdev_readv_blocks_ext(__get_desc(dev), channel, iov, iovcnt, lba, lba_count,
195 					bdev_blob_io_complete, cb_args, &bdev_io_opts);
196 	if (rc == -ENOMEM) {
197 		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args,
198 				   io_opts);
199 	} else if (rc != 0) {
200 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
201 	}
202 }
203 
204 static void
205 bdev_blob_writev_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
206 		     struct iovec *iov, int iovcnt,
207 		     uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args,
208 		     struct spdk_blob_ext_io_opts *io_opts)
209 {
210 	struct spdk_bdev_ext_io_opts bdev_io_opts;
211 	int rc;
212 
213 	blob_ext_io_opts_to_bdev_opts(&bdev_io_opts, io_opts);
214 	rc = spdk_bdev_writev_blocks_ext(__get_desc(dev), channel, iov, iovcnt, lba, lba_count,
215 					 bdev_blob_io_complete, cb_args, &bdev_io_opts);
216 	if (rc == -ENOMEM) {
217 		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args,
218 				   io_opts);
219 	} else if (rc != 0) {
220 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
221 	}
222 }
223 
224 static void
225 bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
226 		       uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
227 {
228 	int rc;
229 
230 	rc = spdk_bdev_write_zeroes_blocks(__get_desc(dev), channel, lba,
231 					   lba_count, bdev_blob_io_complete, cb_args);
232 	if (rc == -ENOMEM) {
233 		bdev_blob_queue_io(dev, channel, NULL, 0, lba, 0,
234 				   lba_count, SPDK_BDEV_IO_TYPE_WRITE_ZEROES, cb_args, NULL);
235 	} else if (rc != 0) {
236 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
237 	}
238 }
239 
240 static void
241 bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
242 		uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
243 {
244 	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
245 	int rc;
246 
247 	if (spdk_bdev_io_type_supported(blob_bdev->bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
248 		rc = spdk_bdev_unmap_blocks(__get_desc(dev), channel, lba, lba_count,
249 					    bdev_blob_io_complete, cb_args);
250 		if (rc == -ENOMEM) {
251 			bdev_blob_queue_io(dev, channel, NULL, 0, lba, 0,
252 					   lba_count, SPDK_BDEV_IO_TYPE_UNMAP, cb_args, NULL);
253 		} else if (rc != 0) {
254 			cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
255 		}
256 	} else {
257 		/*
258 		 * If the device doesn't support unmap, immediately complete
259 		 * the request. Blobstore does not rely on unmap zeroing
260 		 * data.
261 		 */
262 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
263 	}
264 }
265 
266 static void
267 bdev_blob_copy(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
268 	       uint64_t dst_lba, uint64_t src_lba, uint64_t lba_count,
269 	       struct spdk_bs_dev_cb_args *cb_args)
270 {
271 	int rc;
272 
273 	rc = spdk_bdev_copy_blocks(__get_desc(dev), channel,
274 				   dst_lba, src_lba, lba_count,
275 				   bdev_blob_io_complete, cb_args);
276 	if (rc == -ENOMEM) {
277 		bdev_blob_queue_io(dev, channel, NULL, 0, dst_lba, src_lba,
278 				   lba_count, SPDK_BDEV_IO_TYPE_COPY, cb_args, NULL);
279 	} else if (rc != 0) {
280 		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
281 	}
282 }
283 
284 static void
285 bdev_blob_resubmit(void *arg)
286 {
287 	struct blob_resubmit *ctx = (struct blob_resubmit *) arg;
288 
289 	switch (ctx->io_type) {
290 	case SPDK_BDEV_IO_TYPE_READ:
291 		if (ctx->iovcnt > 0) {
292 			bdev_blob_readv_ext(ctx->dev, ctx->channel, (struct iovec *) ctx->payload, ctx->iovcnt,
293 					    ctx->lba, ctx->lba_count, ctx->cb_args, ctx->ext_io_opts);
294 		} else {
295 			bdev_blob_read(ctx->dev, ctx->channel, ctx->payload,
296 				       ctx->lba, ctx->lba_count, ctx->cb_args);
297 		}
298 		break;
299 	case SPDK_BDEV_IO_TYPE_WRITE:
300 		if (ctx->iovcnt > 0) {
301 			bdev_blob_writev_ext(ctx->dev, ctx->channel, (struct iovec *) ctx->payload, ctx->iovcnt,
302 					     ctx->lba, ctx->lba_count, ctx->cb_args, ctx->ext_io_opts);
303 		} else {
304 			bdev_blob_write(ctx->dev, ctx->channel, ctx->payload,
305 					ctx->lba, ctx->lba_count, ctx->cb_args);
306 		}
307 		break;
308 	case SPDK_BDEV_IO_TYPE_UNMAP:
309 		bdev_blob_unmap(ctx->dev, ctx->channel,
310 				ctx->lba, ctx->lba_count, ctx->cb_args);
311 		break;
312 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
313 		bdev_blob_write_zeroes(ctx->dev, ctx->channel,
314 				       ctx->lba, ctx->lba_count, ctx->cb_args);
315 		break;
316 	case SPDK_BDEV_IO_TYPE_COPY:
317 		bdev_blob_copy(ctx->dev, ctx->channel,
318 			       ctx->lba, ctx->src_lba, ctx->lba_count, ctx->cb_args);
319 		break;
320 	default:
321 		SPDK_ERRLOG("Unsupported io type %d\n", ctx->io_type);
322 		assert(false);
323 		break;
324 	}
325 	free(ctx);
326 }
327 
328 int
329 spdk_bs_bdev_claim(struct spdk_bs_dev *bs_dev, struct spdk_bdev_module *module)
330 {
331 	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
332 	struct spdk_bdev_desc *desc = blob_bdev->desc;
333 	enum spdk_bdev_claim_type claim_type;
334 	int rc;
335 
336 	claim_type = blob_bdev->write ? SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE :
337 		     SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
338 	rc = spdk_bdev_module_claim_bdev_desc(desc, claim_type, NULL, module);
339 	if (rc != 0) {
340 		SPDK_ERRLOG("could not claim bs dev\n");
341 		return rc;
342 	}
343 
344 	return rc;
345 }
346 
347 static struct spdk_io_channel *
348 bdev_blob_create_channel(struct spdk_bs_dev *dev)
349 {
350 	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
351 	struct spdk_io_channel *ch;
352 
353 	ch = spdk_bdev_get_io_channel(blob_bdev->desc);
354 	if (ch != NULL) {
355 		spdk_spin_lock(&blob_bdev->lock);
356 		blob_bdev->refs++;
357 		spdk_spin_unlock(&blob_bdev->lock);
358 	}
359 
360 	return ch;
361 }
362 
363 static void
364 bdev_blob_free(struct blob_bdev *blob_bdev)
365 {
366 	assert(blob_bdev->refs == 0);
367 
368 	spdk_spin_destroy(&blob_bdev->lock);
369 	free(blob_bdev);
370 }
371 
372 static void
373 bdev_blob_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel)
374 {
375 	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
376 	int32_t refs;
377 
378 	spdk_spin_lock(&blob_bdev->lock);
379 
380 	assert(blob_bdev->refs > 0);
381 	blob_bdev->refs--;
382 	refs = blob_bdev->refs;
383 
384 	spdk_spin_unlock(&blob_bdev->lock);
385 
386 	spdk_put_io_channel(channel);
387 
388 	/*
389 	 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero, the blob and
390 	 * this channel have been destroyed. This means that dev->destroy() has been called and it
391 	 * would be an error (akin to use after free) if dev is dereferenced after destroying it.
392 	 * Thus, there should be no race with bdev_blob_create_channel().
393 	 *
394 	 * Because the value of blob_bdev->refs was taken while holding the lock here and the same
395 	 * is done in bdev_blob_destroy(), there is no race with bdev_blob_destroy().
396 	 */
397 	if (refs == 0) {
398 		bdev_blob_free(blob_bdev);
399 	}
400 }
401 
402 static void
403 bdev_blob_destroy(struct spdk_bs_dev *bs_dev)
404 {
405 	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
406 	struct spdk_bdev_desc *desc;
407 	int32_t refs;
408 
409 	spdk_spin_lock(&blob_bdev->lock);
410 
411 	desc = blob_bdev->desc;
412 	blob_bdev->desc = NULL;
413 	blob_bdev->refs--;
414 	refs = blob_bdev->refs;
415 
416 	spdk_spin_unlock(&blob_bdev->lock);
417 
418 	spdk_bdev_close(desc);
419 
420 	/*
421 	 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero,
422 	 * bs_dev->destroy() has been called and all the channels have been destroyed. It would be
423 	 * an error (akin to use after free) if bs_dev is dereferenced after destroying it. Thus,
424 	 * there should be no race with bdev_blob_create_channel().
425 	 *
426 	 * Because the value of blob_bdev->refs was taken while holding the lock here and the same
427 	 * is done in bdev_blob_destroy_channel(), there is no race with
428 	 * bdev_blob_destroy_channel().
429 	 */
430 	if (refs == 0) {
431 		bdev_blob_free(blob_bdev);
432 	}
433 }
434 
435 static struct spdk_bdev *
436 bdev_blob_get_base_bdev(struct spdk_bs_dev *bs_dev)
437 {
438 	return __get_bdev(bs_dev);
439 }
440 
441 static bool
442 bdev_blob_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
443 {
444 	return false;
445 }
446 
447 static bool
448 bdev_blob_translate_lba(struct spdk_bs_dev *dev, uint64_t lba, uint64_t *base_lba)
449 {
450 	*base_lba = lba;
451 	return true;
452 }
453 
454 static void
455 blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc)
456 {
457 	struct spdk_bdev *bdev;
458 
459 	bdev = spdk_bdev_desc_get_bdev(desc);
460 	assert(bdev != NULL);
461 
462 	b->bdev = bdev;
463 	b->desc = desc;
464 	b->bs_dev.blockcnt = spdk_bdev_get_num_blocks(bdev);
465 	b->bs_dev.blocklen = spdk_bdev_get_block_size(bdev);
466 	b->bs_dev.create_channel = bdev_blob_create_channel;
467 	b->bs_dev.destroy_channel = bdev_blob_destroy_channel;
468 	b->bs_dev.destroy = bdev_blob_destroy;
469 	b->bs_dev.read = bdev_blob_read;
470 	b->bs_dev.write = bdev_blob_write;
471 	b->bs_dev.readv = bdev_blob_readv;
472 	b->bs_dev.writev = bdev_blob_writev;
473 	b->bs_dev.readv_ext = bdev_blob_readv_ext;
474 	b->bs_dev.writev_ext = bdev_blob_writev_ext;
475 	b->bs_dev.write_zeroes = bdev_blob_write_zeroes;
476 	b->bs_dev.unmap = bdev_blob_unmap;
477 	if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY)) {
478 		b->bs_dev.copy = bdev_blob_copy;
479 	}
480 	b->bs_dev.get_base_bdev = bdev_blob_get_base_bdev;
481 	b->bs_dev.is_zeroes = bdev_blob_is_zeroes;
482 	b->bs_dev.translate_lba = bdev_blob_translate_lba;
483 }
484 
485 void
486 spdk_bdev_update_bs_blockcnt(struct spdk_bs_dev *bs_dev)
487 {
488 	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
489 
490 	assert(bs_dev->blocklen == spdk_bdev_get_block_size(blob_bdev->bdev));
491 	bs_dev->blockcnt = spdk_bdev_get_num_blocks(blob_bdev->bdev);
492 }
493 
494 int
495 spdk_bdev_create_bs_dev(const char *bdev_name, bool write,
496 			struct spdk_bdev_bs_dev_opts *opts, size_t opts_size,
497 			spdk_bdev_event_cb_t event_cb, void *event_ctx,
498 			struct spdk_bs_dev **bs_dev)
499 {
500 	struct blob_bdev *b;
501 	struct spdk_bdev_desc *desc;
502 	int rc;
503 
504 	assert(spdk_get_thread() != NULL);
505 
506 	if (opts != NULL && opts_size != sizeof(*opts)) {
507 		SPDK_ERRLOG("bdev name '%s': unsupported options\n", bdev_name);
508 		return -EINVAL;
509 	}
510 
511 	b = calloc(1, sizeof(*b));
512 
513 	if (b == NULL) {
514 		SPDK_ERRLOG("could not allocate blob_bdev\n");
515 		return -ENOMEM;
516 	}
517 
518 	rc = spdk_bdev_open_ext(bdev_name, write, event_cb, event_ctx, &desc);
519 	if (rc != 0) {
520 		free(b);
521 		return rc;
522 	}
523 
524 	blob_bdev_init(b, desc);
525 
526 	*bs_dev = &b->bs_dev;
527 	b->write = write;
528 	b->refs = 1;
529 	spdk_spin_init(&b->lock);
530 
531 	return 0;
532 }
533 
534 int
535 spdk_bdev_create_bs_dev_ext(const char *bdev_name, spdk_bdev_event_cb_t event_cb,
536 			    void *event_ctx, struct spdk_bs_dev **bs_dev)
537 {
538 	return spdk_bdev_create_bs_dev(bdev_name, true, NULL, 0, event_cb, event_ctx, bs_dev);
539 }
540