xref: /spdk/module/bdev/virtio/bdev_virtio_blk.c (revision 45a053c5777494f4e8ce4bc1191c9de3920377f7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/bdev.h"
9 #include "spdk/endian.h"
10 #include "spdk/env.h"
11 #include "spdk/thread.h"
12 #include "spdk/string.h"
13 #include "spdk/util.h"
14 #include "spdk/json.h"
15 
16 #include "spdk_internal/assert.h"
17 #include "spdk/bdev_module.h"
18 #include "spdk/log.h"
19 #include "spdk_internal/virtio.h"
20 #include "spdk_internal/vhost_user.h"
21 
22 #include <linux/virtio_blk.h>
23 #include <linux/virtio_ids.h>
24 
25 #include "bdev_virtio.h"
26 
27 struct virtio_blk_dev {
28 	struct virtio_dev		vdev;
29 	struct spdk_bdev		bdev;
30 	bool				readonly;
31 	bool				unmap;
32 };
33 
34 struct virtio_blk_io_ctx {
35 	struct iovec				iov_req;
36 	struct iovec				iov_resp;
37 	struct iovec				iov_unmap;
38 	struct virtio_blk_outhdr		req;
39 	struct virtio_blk_discard_write_zeroes	unmap;
40 	uint8_t					resp;
41 };
42 
43 struct bdev_virtio_blk_io_channel {
44 	struct virtio_dev		*vdev;
45 
46 	/** Virtqueue exclusively assigned to this channel. */
47 	struct virtqueue		*vq;
48 
49 	/** Virtio response poller. */
50 	struct spdk_poller		*poller;
51 };
52 
53 /* Features desired/implemented by this driver. */
54 #define VIRTIO_BLK_DEV_SUPPORTED_FEATURES		\
55 	(1ULL << VIRTIO_BLK_F_SIZE_MAX		|	\
56 	 1ULL << VIRTIO_BLK_F_SEG_MAX		|	\
57 	 1ULL << VIRTIO_BLK_F_BLK_SIZE		|	\
58 	 1ULL << VIRTIO_BLK_F_TOPOLOGY		|	\
59 	 1ULL << VIRTIO_BLK_F_MQ		|	\
60 	 1ULL << VIRTIO_BLK_F_RO		|	\
61 	 1ULL << VIRTIO_BLK_F_DISCARD		|	\
62 	 1ULL << VIRTIO_RING_F_EVENT_IDX)
63 
64 /* 10 sec for max poll period */
65 #define VIRTIO_BLK_HOTPLUG_POLL_PERIOD_MAX		10000000ULL
66 /* Default poll period is 100ms */
67 #define VIRTIO_BLK_HOTPLUG_POLL_PERIOD_DEFAULT		100000ULL
68 
69 static struct spdk_poller *g_blk_hotplug_poller = NULL;
70 static int g_blk_hotplug_fd = -1;
71 
72 static int bdev_virtio_initialize(void);
73 static int bdev_virtio_blk_get_ctx_size(void);
74 
75 static struct spdk_bdev_module virtio_blk_if = {
76 	.name = "virtio_blk",
77 	.module_init = bdev_virtio_initialize,
78 	.get_ctx_size = bdev_virtio_blk_get_ctx_size,
79 };
80 
81 SPDK_BDEV_MODULE_REGISTER(virtio_blk, &virtio_blk_if)
82 
83 static int bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf);
84 static void bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf);
85 
86 static struct virtio_blk_io_ctx *
87 bdev_virtio_blk_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
88 {
89 	struct virtio_blk_outhdr *req;
90 	uint8_t *resp;
91 	struct virtio_blk_discard_write_zeroes *desc;
92 
93 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
94 
95 	req = &io_ctx->req;
96 	resp = &io_ctx->resp;
97 	desc = &io_ctx->unmap;
98 
99 	io_ctx->iov_req.iov_base = req;
100 	io_ctx->iov_req.iov_len = sizeof(*req);
101 
102 	io_ctx->iov_resp.iov_base = resp;
103 	io_ctx->iov_resp.iov_len = sizeof(*resp);
104 
105 	io_ctx->iov_unmap.iov_base = desc;
106 	io_ctx->iov_unmap.iov_len = sizeof(*desc);
107 
108 	memset(req, 0, sizeof(*req));
109 	return io_ctx;
110 }
111 
112 static void
113 bdev_virtio_blk_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
114 {
115 	struct bdev_virtio_blk_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
116 	struct virtqueue *vq = virtio_channel->vq;
117 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
118 	int rc;
119 
120 	rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
121 	if (rc == -ENOMEM) {
122 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
123 		return;
124 	} else if (rc != 0) {
125 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
126 		return;
127 	}
128 
129 	virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
130 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
131 		virtqueue_req_add_iovs(vq, &io_ctx->iov_unmap, 1, SPDK_VIRTIO_DESC_RO);
132 	} else {
133 		virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
134 				       bdev_io->type == SPDK_BDEV_IO_TYPE_READ ?
135 				       SPDK_VIRTIO_DESC_WR : SPDK_VIRTIO_DESC_RO);
136 	}
137 	virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
138 
139 	virtqueue_req_flush(vq);
140 }
141 
142 static void
143 bdev_virtio_command(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
144 {
145 	struct virtio_blk_io_ctx *io_ctx = bdev_virtio_blk_init_io_vreq(ch, bdev_io);
146 	struct virtio_blk_outhdr *req = &io_ctx->req;
147 	struct virtio_blk_discard_write_zeroes *desc = &io_ctx->unmap;
148 
149 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
150 		req->type = VIRTIO_BLK_T_IN;
151 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
152 		req->type = VIRTIO_BLK_T_OUT;
153 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
154 		req->type = VIRTIO_BLK_T_DISCARD;
155 		desc->sector = bdev_io->u.bdev.offset_blocks *
156 			       spdk_bdev_get_block_size(bdev_io->bdev) / 512;
157 		desc->num_sectors = bdev_io->u.bdev.num_blocks *
158 				    spdk_bdev_get_block_size(bdev_io->bdev) / 512;
159 		desc->flags = 0;
160 	}
161 
162 	req->sector = bdev_io->u.bdev.offset_blocks *
163 		      spdk_bdev_get_block_size(bdev_io->bdev) / 512;
164 
165 	bdev_virtio_blk_send_io(ch, bdev_io);
166 }
167 
168 static void
169 bdev_virtio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
170 		       bool success)
171 {
172 	if (!success) {
173 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
174 		return;
175 	}
176 
177 	bdev_virtio_command(ch, bdev_io);
178 }
179 
180 static int
181 _bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
182 {
183 	struct virtio_blk_dev *bvdev = bdev_io->bdev->ctxt;
184 
185 	switch (bdev_io->type) {
186 	case SPDK_BDEV_IO_TYPE_READ:
187 		spdk_bdev_io_get_buf(bdev_io, bdev_virtio_get_buf_cb,
188 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
189 		return 0;
190 	case SPDK_BDEV_IO_TYPE_WRITE:
191 		if (bvdev->readonly) {
192 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
193 		} else {
194 			bdev_virtio_command(ch, bdev_io);
195 		}
196 		return 0;
197 	case SPDK_BDEV_IO_TYPE_RESET:
198 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
199 		return 0;
200 	case SPDK_BDEV_IO_TYPE_UNMAP:
201 		if (bvdev->unmap) {
202 			bdev_virtio_command(ch, bdev_io);
203 		} else {
204 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
205 		}
206 		return 0;
207 	case SPDK_BDEV_IO_TYPE_FLUSH:
208 	default:
209 		return -1;
210 	}
211 
212 	SPDK_UNREACHABLE();
213 }
214 
215 static void
216 bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
217 {
218 	if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
219 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
220 	}
221 }
222 
223 static bool
224 bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
225 {
226 	struct virtio_blk_dev *bvdev = ctx;
227 
228 	switch (io_type) {
229 	case SPDK_BDEV_IO_TYPE_READ:
230 	case SPDK_BDEV_IO_TYPE_RESET:
231 		return true;
232 	case SPDK_BDEV_IO_TYPE_WRITE:
233 		return !bvdev->readonly;
234 	case SPDK_BDEV_IO_TYPE_UNMAP:
235 		return bvdev->unmap;
236 	case SPDK_BDEV_IO_TYPE_FLUSH:
237 	default:
238 		return false;
239 	}
240 }
241 
242 static struct spdk_io_channel *
243 bdev_virtio_get_io_channel(void *ctx)
244 {
245 	struct virtio_blk_dev *bvdev = ctx;
246 
247 	return spdk_get_io_channel(bvdev);
248 }
249 
250 static void
251 virtio_blk_dev_unregister_cb(void *io_device)
252 {
253 	struct virtio_blk_dev *bvdev = io_device;
254 	struct virtio_dev *vdev = &bvdev->vdev;
255 
256 	virtio_dev_stop(vdev);
257 	virtio_dev_destruct(vdev);
258 	spdk_bdev_destruct_done(&bvdev->bdev, 0);
259 	free(bvdev);
260 }
261 
262 static int
263 bdev_virtio_disk_destruct(void *ctx)
264 {
265 	struct virtio_blk_dev *bvdev = ctx;
266 
267 	spdk_io_device_unregister(bvdev, virtio_blk_dev_unregister_cb);
268 	return 1;
269 }
270 
271 int
272 bdev_virtio_blk_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
273 {
274 	return spdk_bdev_unregister_by_name(name, &virtio_blk_if, cb_fn, cb_arg);
275 }
276 
277 static int
278 bdev_virtio_dump_json_config(void *ctx, struct spdk_json_write_ctx *w)
279 {
280 	struct virtio_blk_dev *bvdev = ctx;
281 
282 	virtio_dev_dump_json_info(&bvdev->vdev, w);
283 	return 0;
284 }
285 
286 static void
287 bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
288 {
289 	struct virtio_blk_dev *bvdev = bdev->ctxt;
290 
291 	spdk_json_write_object_begin(w);
292 
293 	spdk_json_write_named_string(w, "method", "bdev_virtio_attach_controller");
294 
295 	spdk_json_write_named_object_begin(w, "params");
296 	spdk_json_write_named_string(w, "name", bvdev->vdev.name);
297 	spdk_json_write_named_string(w, "dev_type", "blk");
298 
299 	/* Write transport specific parameters. */
300 	bvdev->vdev.backend_ops->write_json_config(&bvdev->vdev, w);
301 
302 	spdk_json_write_object_end(w);
303 
304 	spdk_json_write_object_end(w);
305 }
306 
307 static const struct spdk_bdev_fn_table virtio_fn_table = {
308 	.destruct		= bdev_virtio_disk_destruct,
309 	.submit_request		= bdev_virtio_submit_request,
310 	.io_type_supported	= bdev_virtio_io_type_supported,
311 	.get_io_channel		= bdev_virtio_get_io_channel,
312 	.dump_info_json		= bdev_virtio_dump_json_config,
313 	.write_config_json	= bdev_virtio_write_config_json,
314 };
315 
316 static void
317 bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
318 {
319 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
320 
321 	spdk_bdev_io_complete(bdev_io, io_ctx->resp == VIRTIO_BLK_S_OK ?
322 			      SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
323 }
324 
325 static int
326 bdev_virtio_poll(void *arg)
327 {
328 	struct bdev_virtio_blk_io_channel *ch = arg;
329 	void *io[32];
330 	uint32_t io_len[32];
331 	uint16_t i, cnt;
332 
333 	cnt = virtio_recv_pkts(ch->vq, io, io_len, SPDK_COUNTOF(io));
334 	for (i = 0; i < cnt; ++i) {
335 		bdev_virtio_io_cpl(io[i]);
336 	}
337 
338 	return cnt;
339 }
340 
341 static int
342 bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf)
343 {
344 	struct virtio_blk_dev *bvdev = io_device;
345 	struct virtio_dev *vdev = &bvdev->vdev;
346 	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
347 	struct virtqueue *vq;
348 	int32_t queue_idx;
349 
350 	queue_idx = virtio_dev_find_and_acquire_queue(vdev, 0);
351 	if (queue_idx < 0) {
352 		SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
353 		return -1;
354 	}
355 
356 	vq = vdev->vqs[queue_idx];
357 
358 	ch->vdev = vdev;
359 	ch->vq = vq;
360 
361 	ch->poller = SPDK_POLLER_REGISTER(bdev_virtio_poll, ch, 0);
362 	return 0;
363 }
364 
365 static void
366 bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf)
367 {
368 	struct virtio_blk_dev *bvdev = io_device;
369 	struct virtio_dev *vdev = &bvdev->vdev;
370 	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
371 	struct virtqueue *vq = ch->vq;
372 
373 	spdk_poller_unregister(&ch->poller);
374 	virtio_dev_release_queue(vdev, vq->vq_queue_index);
375 }
376 
377 static int
378 virtio_blk_dev_init(struct virtio_blk_dev *bvdev, uint16_t max_queues)
379 {
380 	struct virtio_dev *vdev = &bvdev->vdev;
381 	struct spdk_bdev *bdev = &bvdev->bdev;
382 	uint64_t capacity, num_blocks;
383 	uint32_t block_size, size_max, seg_max;
384 	uint16_t host_max_queues;
385 	int rc;
386 
387 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) {
388 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, blk_size),
389 						&block_size, sizeof(block_size));
390 		if (rc) {
391 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
392 			return rc;
393 		}
394 
395 		if (block_size == 0 || block_size % 512 != 0) {
396 			SPDK_ERRLOG("%s: invalid block size (%"PRIu32"). Must be "
397 				    "a multiple of 512.\n", vdev->name, block_size);
398 			return -EIO;
399 		}
400 	} else {
401 		block_size = 512;
402 	}
403 
404 	rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, capacity),
405 					&capacity, sizeof(capacity));
406 	if (rc) {
407 		SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
408 		return rc;
409 	}
410 
411 	/* `capacity` is a number of 512-byte sectors. */
412 	num_blocks = capacity * 512 / block_size;
413 	if (num_blocks == 0) {
414 		SPDK_ERRLOG("%s: size too small (size: %"PRIu64", blocksize: %"PRIu32").\n",
415 			    vdev->name, capacity * 512, block_size);
416 		return -EIO;
417 	}
418 
419 	if ((capacity * 512) % block_size != 0) {
420 		SPDK_WARNLOG("%s: size has been rounded down to the nearest block size boundary. "
421 			     "(block size: %"PRIu32", previous size: %"PRIu64", new size: %"PRIu64")\n",
422 			     vdev->name, block_size, capacity * 512, num_blocks * block_size);
423 	}
424 
425 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
426 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
427 						&host_max_queues, sizeof(host_max_queues));
428 		if (rc) {
429 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
430 			return rc;
431 		}
432 	} else {
433 		host_max_queues = 1;
434 	}
435 
436 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_SIZE_MAX)) {
437 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, size_max),
438 						&size_max, sizeof(size_max));
439 		if (rc) {
440 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
441 			return rc;
442 		}
443 
444 		if (spdk_unlikely(size_max < block_size)) {
445 			SPDK_WARNLOG("%s: minimum segment size is set to block size %u forcefully.\n",
446 				     vdev->name, block_size);
447 			size_max = block_size;
448 		}
449 
450 		bdev->max_segment_size = size_max;
451 	}
452 
453 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_SEG_MAX)) {
454 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, seg_max),
455 						&seg_max, sizeof(seg_max));
456 		if (rc) {
457 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
458 			return rc;
459 		}
460 
461 		if (spdk_unlikely(seg_max == 0)) {
462 			SPDK_ERRLOG("%s: virtio blk SEG_MAX can't be 0\n", vdev->name);
463 			return -EINVAL;
464 		}
465 
466 		bdev->max_num_segments = seg_max;
467 	}
468 
469 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_RO)) {
470 		bvdev->readonly = true;
471 	}
472 
473 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
474 		bvdev->unmap = true;
475 	}
476 
477 	if (max_queues == 0) {
478 		SPDK_ERRLOG("%s: requested 0 request queues (%"PRIu16" available).\n",
479 			    vdev->name, host_max_queues);
480 		return -EINVAL;
481 	}
482 
483 	if (max_queues > host_max_queues) {
484 		SPDK_WARNLOG("%s: requested %"PRIu16" request queues "
485 			     "but only %"PRIu16" available.\n",
486 			     vdev->name, max_queues, host_max_queues);
487 		max_queues = host_max_queues;
488 	}
489 
490 	/* bdev is tied with the virtio device; we can reuse the name */
491 	bdev->name = vdev->name;
492 	rc = virtio_dev_start(vdev, max_queues, 0);
493 	if (rc != 0) {
494 		return rc;
495 	}
496 
497 	bdev->product_name = "VirtioBlk Disk";
498 	bdev->write_cache = 0;
499 	bdev->blocklen = block_size;
500 	bdev->blockcnt = num_blocks;
501 
502 	bdev->ctxt = bvdev;
503 	bdev->fn_table = &virtio_fn_table;
504 	bdev->module = &virtio_blk_if;
505 
506 	spdk_io_device_register(bvdev, bdev_virtio_blk_ch_create_cb,
507 				bdev_virtio_blk_ch_destroy_cb,
508 				sizeof(struct bdev_virtio_blk_io_channel),
509 				vdev->name);
510 
511 	rc = spdk_bdev_register(bdev);
512 	if (rc) {
513 		SPDK_ERRLOG("Failed to register bdev name=%s\n", bdev->name);
514 		spdk_io_device_unregister(bvdev, NULL);
515 		virtio_dev_stop(vdev);
516 		return rc;
517 	}
518 
519 	return 0;
520 }
521 
522 static struct virtio_blk_dev *
523 virtio_pci_blk_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
524 {
525 	static int pci_dev_counter = 0;
526 	struct virtio_blk_dev *bvdev;
527 	struct virtio_dev *vdev;
528 	char *default_name = NULL;
529 	uint16_t num_queues;
530 	int rc;
531 
532 	bvdev = calloc(1, sizeof(*bvdev));
533 	if (bvdev == NULL) {
534 		SPDK_ERRLOG("virtio device calloc failed\n");
535 		return NULL;
536 	}
537 	vdev = &bvdev->vdev;
538 
539 	if (name == NULL) {
540 		default_name = spdk_sprintf_alloc("VirtioBlk%"PRIu32, pci_dev_counter++);
541 		if (default_name == NULL) {
542 			free(vdev);
543 			return NULL;
544 		}
545 		name = default_name;
546 	}
547 
548 	rc = virtio_pci_dev_init(vdev, name, pci_ctx);
549 	free(default_name);
550 
551 	if (rc != 0) {
552 		free(bvdev);
553 		return NULL;
554 	}
555 
556 	rc = virtio_dev_reset(vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
557 	if (rc != 0) {
558 		goto fail;
559 	}
560 
561 	/* TODO: add a way to limit usable virtqueues */
562 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
563 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
564 						&num_queues, sizeof(num_queues));
565 		if (rc) {
566 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
567 			goto fail;
568 		}
569 	} else {
570 		num_queues = 1;
571 	}
572 
573 	rc = virtio_blk_dev_init(bvdev, num_queues);
574 	if (rc != 0) {
575 		goto fail;
576 	}
577 
578 	return bvdev;
579 
580 fail:
581 	vdev->ctx = NULL;
582 	virtio_dev_destruct(vdev);
583 	free(bvdev);
584 	return NULL;
585 }
586 
587 static struct virtio_blk_dev *
588 virtio_user_blk_dev_create(const char *name, const char *path,
589 			   uint16_t num_queues, uint32_t queue_size)
590 {
591 	struct virtio_blk_dev *bvdev;
592 	uint64_t feature_bits;
593 	int rc;
594 
595 	bvdev = calloc(1, sizeof(*bvdev));
596 	if (bvdev == NULL) {
597 		SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
598 		return NULL;
599 	}
600 
601 	rc = virtio_user_dev_init(&bvdev->vdev, name, path, queue_size);
602 	if (rc != 0) {
603 		SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
604 		free(bvdev);
605 		return NULL;
606 	}
607 
608 	feature_bits = VIRTIO_BLK_DEV_SUPPORTED_FEATURES;
609 	feature_bits |= (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
610 	rc = virtio_dev_reset(&bvdev->vdev, feature_bits);
611 	if (rc != 0) {
612 		virtio_dev_destruct(&bvdev->vdev);
613 		free(bvdev);
614 		return NULL;
615 	}
616 
617 	rc = virtio_blk_dev_init(bvdev, num_queues);
618 	if (rc != 0) {
619 		virtio_dev_destruct(&bvdev->vdev);
620 		free(bvdev);
621 		return NULL;
622 	}
623 
624 	return bvdev;
625 }
626 
627 struct bdev_virtio_pci_dev_create_ctx {
628 	const char *name;
629 	struct virtio_blk_dev *ret;
630 };
631 
632 static int
633 bdev_virtio_pci_blk_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
634 {
635 	struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
636 
637 	create_ctx->ret = virtio_pci_blk_dev_create(create_ctx->name, pci_ctx);
638 	if (create_ctx->ret == NULL) {
639 		return -1;
640 	}
641 
642 	return 0;
643 }
644 
645 struct spdk_bdev *
646 bdev_virtio_pci_blk_dev_create(const char *name, struct spdk_pci_addr *pci_addr)
647 {
648 	struct bdev_virtio_pci_dev_create_ctx create_ctx;
649 
650 	create_ctx.name = name;
651 	create_ctx.ret = NULL;
652 
653 	virtio_pci_dev_attach(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
654 			      VIRTIO_ID_BLOCK, pci_addr);
655 
656 	if (create_ctx.ret == NULL) {
657 		return NULL;
658 	}
659 
660 	return &create_ctx.ret->bdev;
661 }
662 
663 static int
664 bdev_virtio_pci_blk_monitor(void *arg)
665 {
666 	const char *vdev_name;
667 	struct bdev_virtio_pci_dev_create_ctx create_ctx;
668 
669 	while ((vdev_name = virtio_pci_dev_event_process(g_blk_hotplug_fd, VIRTIO_ID_BLOCK)) != NULL) {
670 		bdev_virtio_blk_dev_remove(vdev_name, NULL, NULL);
671 	}
672 
673 	/* Enumerate virtio pci_blk device */
674 	memset(&create_ctx, 0, sizeof(create_ctx));
675 	virtio_pci_dev_enumerate(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
676 				 VIRTIO_ID_BLOCK);
677 
678 	return SPDK_POLLER_BUSY;
679 }
680 
681 int
682 bdev_virtio_pci_blk_set_hotplug(bool enabled, uint64_t period_us)
683 {
684 	if (enabled == true && !spdk_process_is_primary()) {
685 		return -EPERM;
686 	}
687 
688 	if (g_blk_hotplug_poller) {
689 		close(g_blk_hotplug_fd);
690 		spdk_poller_unregister(&g_blk_hotplug_poller);
691 	}
692 
693 	if (!enabled) {
694 		return 0;
695 	}
696 
697 	g_blk_hotplug_fd = spdk_pci_event_listen();
698 	if (g_blk_hotplug_fd < 0) {
699 		return g_blk_hotplug_fd;
700 	}
701 
702 	period_us = period_us ? period_us : VIRTIO_BLK_HOTPLUG_POLL_PERIOD_DEFAULT;
703 	period_us = spdk_min(period_us, VIRTIO_BLK_HOTPLUG_POLL_PERIOD_MAX);
704 	g_blk_hotplug_poller = spdk_poller_register(bdev_virtio_pci_blk_monitor, NULL, period_us);
705 	if (!g_blk_hotplug_poller) {
706 		close(g_blk_hotplug_fd);
707 		return -1;
708 	}
709 
710 	return 0;
711 }
712 
713 static int
714 bdev_virtio_initialize(void)
715 {
716 	return 0;
717 }
718 
719 struct spdk_bdev *
720 bdev_virtio_user_blk_dev_create(const char *name, const char *path,
721 				unsigned num_queues, unsigned queue_size)
722 {
723 	struct virtio_blk_dev *bvdev;
724 
725 	bvdev = virtio_user_blk_dev_create(name, path, num_queues, queue_size);
726 	if (bvdev == NULL) {
727 		return NULL;
728 	}
729 
730 	return &bvdev->bdev;
731 }
732 
733 struct spdk_bdev *
734 bdev_virtio_vfio_user_blk_dev_create(const char *name, const char *path)
735 {
736 	struct virtio_blk_dev *bvdev;
737 	uint16_t num_queues = 0;
738 	int rc;
739 
740 	bvdev = calloc(1, sizeof(*bvdev));
741 	if (bvdev == NULL) {
742 		SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
743 		return NULL;
744 	}
745 
746 	rc = virtio_vfio_user_dev_init(&bvdev->vdev, name, path);
747 	if (rc != 0) {
748 		SPDK_ERRLOG("Failed to create %s as virtio device\n", path);
749 		free(bvdev);
750 		return NULL;
751 	}
752 
753 	rc = virtio_dev_reset(&bvdev->vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
754 	if (rc != 0) {
755 		SPDK_ERRLOG("Failed to reset %s as virtio device\n", path);
756 		virtio_dev_destruct(&bvdev->vdev);
757 		free(bvdev);
758 		return NULL;
759 	}
760 
761 	if (virtio_dev_has_feature(&bvdev->vdev, VIRTIO_BLK_F_MQ)) {
762 		rc = virtio_dev_read_dev_config(&bvdev->vdev, offsetof(struct virtio_blk_config, num_queues),
763 						&num_queues, sizeof(num_queues));
764 		if (rc) {
765 			SPDK_ERRLOG("%s: config read failed: %s\n", name, spdk_strerror(-rc));
766 			virtio_dev_destruct(&bvdev->vdev);
767 			free(bvdev);
768 			return NULL;
769 		}
770 	} else {
771 		num_queues = 1;
772 	}
773 
774 	rc = virtio_blk_dev_init(bvdev, num_queues);
775 	if (rc != 0) {
776 		SPDK_ERRLOG("Failed to initialize %s as virtio device\n", path);
777 		virtio_dev_destruct(&bvdev->vdev);
778 		free(bvdev);
779 		return NULL;
780 	}
781 
782 	return &bvdev->bdev;
783 }
784 
785 static int
786 bdev_virtio_blk_get_ctx_size(void)
787 {
788 	return sizeof(struct virtio_blk_io_ctx);
789 }
790 
791 SPDK_LOG_REGISTER_COMPONENT(virtio_blk)
792