xref: /spdk/module/bdev/virtio/bdev_virtio_blk.c (revision 32999ab917f67af61872f868585fd3d78ad6fb8a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/bdev.h"
37 #include "spdk/endian.h"
38 #include "spdk/env.h"
39 #include "spdk/thread.h"
40 #include "spdk/string.h"
41 #include "spdk/util.h"
42 #include "spdk/json.h"
43 
44 #include "spdk_internal/assert.h"
45 #include "spdk/bdev_module.h"
46 #include "spdk/log.h"
47 #include "spdk_internal/virtio.h"
48 #include "spdk_internal/vhost_user.h"
49 
50 #include <linux/virtio_blk.h>
51 #include <linux/virtio_ids.h>
52 
53 #include "bdev_virtio.h"
54 
55 struct virtio_blk_dev {
56 	struct virtio_dev		vdev;
57 	struct spdk_bdev		bdev;
58 	bool				readonly;
59 	bool				unmap;
60 };
61 
62 struct virtio_blk_io_ctx {
63 	struct iovec				iov_req;
64 	struct iovec				iov_resp;
65 	struct iovec				iov_unmap;
66 	struct virtio_blk_outhdr		req;
67 	struct virtio_blk_discard_write_zeroes	unmap;
68 	uint8_t					resp;
69 };
70 
71 struct bdev_virtio_blk_io_channel {
72 	struct virtio_dev		*vdev;
73 
74 	/** Virtqueue exclusively assigned to this channel. */
75 	struct virtqueue		*vq;
76 
77 	/** Virtio response poller. */
78 	struct spdk_poller		*poller;
79 };
80 
81 /* Features desired/implemented by this driver. */
82 #define VIRTIO_BLK_DEV_SUPPORTED_FEATURES		\
83 	(1ULL << VIRTIO_BLK_F_SIZE_MAX		|	\
84 	 1ULL << VIRTIO_BLK_F_SEG_MAX		|	\
85 	 1ULL << VIRTIO_BLK_F_BLK_SIZE		|	\
86 	 1ULL << VIRTIO_BLK_F_TOPOLOGY		|	\
87 	 1ULL << VIRTIO_BLK_F_MQ		|	\
88 	 1ULL << VIRTIO_BLK_F_RO		|	\
89 	 1ULL << VIRTIO_BLK_F_DISCARD		|	\
90 	 1ULL << VIRTIO_RING_F_EVENT_IDX	|	\
91 	 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
92 
93 /* 10 sec for max poll period */
94 #define VIRTIO_BLK_HOTPLUG_POLL_PERIOD_MAX		10000000ULL
95 /* Default poll period is 100ms */
96 #define VIRTIO_BLK_HOTPLUG_POLL_PERIOD_DEFAULT		100000ULL
97 
98 static struct spdk_poller *g_blk_hotplug_poller = NULL;
99 static int g_blk_hotplug_fd = -1;
100 
101 static int bdev_virtio_initialize(void);
102 static int bdev_virtio_blk_get_ctx_size(void);
103 
104 static struct spdk_bdev_module virtio_blk_if = {
105 	.name = "virtio_blk",
106 	.module_init = bdev_virtio_initialize,
107 	.get_ctx_size = bdev_virtio_blk_get_ctx_size,
108 };
109 
110 SPDK_BDEV_MODULE_REGISTER(virtio_blk, &virtio_blk_if)
111 
112 static int bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf);
113 static void bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf);
114 
115 static struct virtio_blk_io_ctx *
116 bdev_virtio_blk_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
117 {
118 	struct virtio_blk_outhdr *req;
119 	uint8_t *resp;
120 	struct virtio_blk_discard_write_zeroes *desc;
121 
122 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
123 
124 	req = &io_ctx->req;
125 	resp = &io_ctx->resp;
126 	desc = &io_ctx->unmap;
127 
128 	io_ctx->iov_req.iov_base = req;
129 	io_ctx->iov_req.iov_len = sizeof(*req);
130 
131 	io_ctx->iov_resp.iov_base = resp;
132 	io_ctx->iov_resp.iov_len = sizeof(*resp);
133 
134 	io_ctx->iov_unmap.iov_base = desc;
135 	io_ctx->iov_unmap.iov_len = sizeof(*desc);
136 
137 	memset(req, 0, sizeof(*req));
138 	return io_ctx;
139 }
140 
141 static void
142 bdev_virtio_blk_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
143 {
144 	struct bdev_virtio_blk_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
145 	struct virtqueue *vq = virtio_channel->vq;
146 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
147 	int rc;
148 
149 	rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
150 	if (rc == -ENOMEM) {
151 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
152 		return;
153 	} else if (rc != 0) {
154 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
155 		return;
156 	}
157 
158 	virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
159 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
160 		virtqueue_req_add_iovs(vq, &io_ctx->iov_unmap, 1, SPDK_VIRTIO_DESC_RO);
161 	} else {
162 		virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
163 				       bdev_io->type == SPDK_BDEV_IO_TYPE_READ ?
164 				       SPDK_VIRTIO_DESC_WR : SPDK_VIRTIO_DESC_RO);
165 	}
166 	virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
167 
168 	virtqueue_req_flush(vq);
169 }
170 
171 static void
172 bdev_virtio_command(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
173 {
174 	struct virtio_blk_io_ctx *io_ctx = bdev_virtio_blk_init_io_vreq(ch, bdev_io);
175 	struct virtio_blk_outhdr *req = &io_ctx->req;
176 	struct virtio_blk_discard_write_zeroes *desc = &io_ctx->unmap;
177 
178 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
179 		req->type = VIRTIO_BLK_T_IN;
180 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
181 		req->type = VIRTIO_BLK_T_OUT;
182 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
183 		req->type = VIRTIO_BLK_T_DISCARD;
184 		desc->sector = bdev_io->u.bdev.offset_blocks *
185 			       spdk_bdev_get_block_size(bdev_io->bdev) / 512;
186 		desc->num_sectors = bdev_io->u.bdev.num_blocks *
187 				    spdk_bdev_get_block_size(bdev_io->bdev) / 512;
188 		desc->flags = 0;
189 	}
190 
191 	req->sector = bdev_io->u.bdev.offset_blocks *
192 		      spdk_bdev_get_block_size(bdev_io->bdev) / 512;
193 
194 	bdev_virtio_blk_send_io(ch, bdev_io);
195 }
196 
197 static void
198 bdev_virtio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
199 		       bool success)
200 {
201 	if (!success) {
202 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
203 		return;
204 	}
205 
206 	bdev_virtio_command(ch, bdev_io);
207 }
208 
209 static int
210 _bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
211 {
212 	struct virtio_blk_dev *bvdev = bdev_io->bdev->ctxt;
213 
214 	switch (bdev_io->type) {
215 	case SPDK_BDEV_IO_TYPE_READ:
216 		spdk_bdev_io_get_buf(bdev_io, bdev_virtio_get_buf_cb,
217 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
218 		return 0;
219 	case SPDK_BDEV_IO_TYPE_WRITE:
220 		if (bvdev->readonly) {
221 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
222 		} else {
223 			bdev_virtio_command(ch, bdev_io);
224 		}
225 		return 0;
226 	case SPDK_BDEV_IO_TYPE_RESET:
227 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
228 		return 0;
229 	case SPDK_BDEV_IO_TYPE_UNMAP:
230 		if (bvdev->unmap) {
231 			bdev_virtio_command(ch, bdev_io);
232 		} else {
233 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
234 		}
235 		return 0;
236 	case SPDK_BDEV_IO_TYPE_FLUSH:
237 	default:
238 		return -1;
239 	}
240 
241 	SPDK_UNREACHABLE();
242 }
243 
244 static void
245 bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
246 {
247 	if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
248 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
249 	}
250 }
251 
252 static bool
253 bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
254 {
255 	struct virtio_blk_dev *bvdev = ctx;
256 
257 	switch (io_type) {
258 	case SPDK_BDEV_IO_TYPE_READ:
259 	case SPDK_BDEV_IO_TYPE_RESET:
260 		return true;
261 	case SPDK_BDEV_IO_TYPE_WRITE:
262 		return !bvdev->readonly;
263 	case SPDK_BDEV_IO_TYPE_UNMAP:
264 		return bvdev->unmap;
265 	case SPDK_BDEV_IO_TYPE_FLUSH:
266 	default:
267 		return false;
268 	}
269 }
270 
271 static struct spdk_io_channel *
272 bdev_virtio_get_io_channel(void *ctx)
273 {
274 	struct virtio_blk_dev *bvdev = ctx;
275 
276 	return spdk_get_io_channel(bvdev);
277 }
278 
279 static void
280 virtio_blk_dev_unregister_cb(void *io_device)
281 {
282 	struct virtio_blk_dev *bvdev = io_device;
283 	struct virtio_dev *vdev = &bvdev->vdev;
284 
285 	virtio_dev_stop(vdev);
286 	virtio_dev_destruct(vdev);
287 	spdk_bdev_destruct_done(&bvdev->bdev, 0);
288 	free(bvdev);
289 }
290 
291 static int
292 bdev_virtio_disk_destruct(void *ctx)
293 {
294 	struct virtio_blk_dev *bvdev = ctx;
295 
296 	spdk_io_device_unregister(bvdev, virtio_blk_dev_unregister_cb);
297 	return 1;
298 }
299 
300 int
301 bdev_virtio_blk_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
302 {
303 	struct spdk_bdev *bdev;
304 
305 	bdev = spdk_bdev_get_by_name(name);
306 	if (bdev == NULL) {
307 		return -ENODEV;
308 	}
309 
310 	if (bdev->module != &virtio_blk_if) {
311 		return -ENODEV;
312 	}
313 
314 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
315 
316 	return 0;
317 }
318 
319 static int
320 bdev_virtio_dump_json_config(void *ctx, struct spdk_json_write_ctx *w)
321 {
322 	struct virtio_blk_dev *bvdev = ctx;
323 
324 	virtio_dev_dump_json_info(&bvdev->vdev, w);
325 	return 0;
326 }
327 
328 static void
329 bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
330 {
331 	struct virtio_blk_dev *bvdev = bdev->ctxt;
332 
333 	spdk_json_write_object_begin(w);
334 
335 	spdk_json_write_named_string(w, "method", "bdev_virtio_attach_controller");
336 
337 	spdk_json_write_named_object_begin(w, "params");
338 	spdk_json_write_named_string(w, "name", bvdev->vdev.name);
339 	spdk_json_write_named_string(w, "dev_type", "blk");
340 
341 	/* Write transport specific parameters. */
342 	bvdev->vdev.backend_ops->write_json_config(&bvdev->vdev, w);
343 
344 	spdk_json_write_object_end(w);
345 
346 	spdk_json_write_object_end(w);
347 }
348 
349 static const struct spdk_bdev_fn_table virtio_fn_table = {
350 	.destruct		= bdev_virtio_disk_destruct,
351 	.submit_request		= bdev_virtio_submit_request,
352 	.io_type_supported	= bdev_virtio_io_type_supported,
353 	.get_io_channel		= bdev_virtio_get_io_channel,
354 	.dump_info_json		= bdev_virtio_dump_json_config,
355 	.write_config_json	= bdev_virtio_write_config_json,
356 };
357 
358 static void
359 bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
360 {
361 	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
362 
363 	spdk_bdev_io_complete(bdev_io, io_ctx->resp == VIRTIO_BLK_S_OK ?
364 			      SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
365 }
366 
367 static int
368 bdev_virtio_poll(void *arg)
369 {
370 	struct bdev_virtio_blk_io_channel *ch = arg;
371 	void *io[32];
372 	uint32_t io_len[32];
373 	uint16_t i, cnt;
374 
375 	cnt = virtio_recv_pkts(ch->vq, io, io_len, SPDK_COUNTOF(io));
376 	for (i = 0; i < cnt; ++i) {
377 		bdev_virtio_io_cpl(io[i]);
378 	}
379 
380 	return cnt;
381 }
382 
383 static int
384 bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf)
385 {
386 	struct virtio_blk_dev *bvdev = io_device;
387 	struct virtio_dev *vdev = &bvdev->vdev;
388 	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
389 	struct virtqueue *vq;
390 	int32_t queue_idx;
391 
392 	queue_idx = virtio_dev_find_and_acquire_queue(vdev, 0);
393 	if (queue_idx < 0) {
394 		SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
395 		return -1;
396 	}
397 
398 	vq = vdev->vqs[queue_idx];
399 
400 	ch->vdev = vdev;
401 	ch->vq = vq;
402 
403 	ch->poller = SPDK_POLLER_REGISTER(bdev_virtio_poll, ch, 0);
404 	return 0;
405 }
406 
407 static void
408 bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf)
409 {
410 	struct virtio_blk_dev *bvdev = io_device;
411 	struct virtio_dev *vdev = &bvdev->vdev;
412 	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
413 	struct virtqueue *vq = ch->vq;
414 
415 	spdk_poller_unregister(&ch->poller);
416 	virtio_dev_release_queue(vdev, vq->vq_queue_index);
417 }
418 
419 static int
420 virtio_blk_dev_init(struct virtio_blk_dev *bvdev, uint16_t max_queues)
421 {
422 	struct virtio_dev *vdev = &bvdev->vdev;
423 	struct spdk_bdev *bdev = &bvdev->bdev;
424 	uint64_t capacity, num_blocks;
425 	uint32_t block_size, size_max, seg_max;
426 	uint16_t host_max_queues;
427 	int rc;
428 
429 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) {
430 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, blk_size),
431 						&block_size, sizeof(block_size));
432 		if (rc) {
433 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
434 			return rc;
435 		}
436 
437 		if (block_size == 0 || block_size % 512 != 0) {
438 			SPDK_ERRLOG("%s: invalid block size (%"PRIu32"). Must be "
439 				    "a multiple of 512.\n", vdev->name, block_size);
440 			return -EIO;
441 		}
442 	} else {
443 		block_size = 512;
444 	}
445 
446 	rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, capacity),
447 					&capacity, sizeof(capacity));
448 	if (rc) {
449 		SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
450 		return rc;
451 	}
452 
453 	/* `capacity` is a number of 512-byte sectors. */
454 	num_blocks = capacity * 512 / block_size;
455 	if (num_blocks == 0) {
456 		SPDK_ERRLOG("%s: size too small (size: %"PRIu64", blocksize: %"PRIu32").\n",
457 			    vdev->name, capacity * 512, block_size);
458 		return -EIO;
459 	}
460 
461 	if ((capacity * 512) % block_size != 0) {
462 		SPDK_WARNLOG("%s: size has been rounded down to the nearest block size boundary. "
463 			     "(block size: %"PRIu32", previous size: %"PRIu64", new size: %"PRIu64")\n",
464 			     vdev->name, block_size, capacity * 512, num_blocks * block_size);
465 	}
466 
467 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
468 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
469 						&host_max_queues, sizeof(host_max_queues));
470 		if (rc) {
471 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
472 			return rc;
473 		}
474 	} else {
475 		host_max_queues = 1;
476 	}
477 
478 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_SIZE_MAX)) {
479 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, size_max),
480 						&size_max, sizeof(size_max));
481 		if (rc) {
482 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
483 			return rc;
484 		}
485 
486 		if (spdk_unlikely(size_max < block_size)) {
487 			SPDK_WARNLOG("%s: minimum segment size is set to block size %u forcefully.\n",
488 				     vdev->name, block_size);
489 			size_max = block_size;
490 		}
491 
492 		bdev->max_segment_size = size_max;
493 	}
494 
495 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_SEG_MAX)) {
496 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, seg_max),
497 						&seg_max, sizeof(seg_max));
498 		if (rc) {
499 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
500 			return rc;
501 		}
502 
503 		if (spdk_unlikely(seg_max == 0)) {
504 			SPDK_ERRLOG("%s: virtio blk SEG_MAX can't be 0\n", vdev->name);
505 			return -EINVAL;
506 		}
507 
508 		bdev->max_num_segments = seg_max;
509 	}
510 
511 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_RO)) {
512 		bvdev->readonly = true;
513 	}
514 
515 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
516 		bvdev->unmap = true;
517 	}
518 
519 	if (max_queues == 0) {
520 		SPDK_ERRLOG("%s: requested 0 request queues (%"PRIu16" available).\n",
521 			    vdev->name, host_max_queues);
522 		return -EINVAL;
523 	}
524 
525 	if (max_queues > host_max_queues) {
526 		SPDK_WARNLOG("%s: requested %"PRIu16" request queues "
527 			     "but only %"PRIu16" available.\n",
528 			     vdev->name, max_queues, host_max_queues);
529 		max_queues = host_max_queues;
530 	}
531 
532 	/* bdev is tied with the virtio device; we can reuse the name */
533 	bdev->name = vdev->name;
534 	rc = virtio_dev_start(vdev, max_queues, 0);
535 	if (rc != 0) {
536 		return rc;
537 	}
538 
539 	bdev->product_name = "VirtioBlk Disk";
540 	bdev->write_cache = 0;
541 	bdev->blocklen = block_size;
542 	bdev->blockcnt = num_blocks;
543 
544 	bdev->ctxt = bvdev;
545 	bdev->fn_table = &virtio_fn_table;
546 	bdev->module = &virtio_blk_if;
547 
548 	spdk_io_device_register(bvdev, bdev_virtio_blk_ch_create_cb,
549 				bdev_virtio_blk_ch_destroy_cb,
550 				sizeof(struct bdev_virtio_blk_io_channel),
551 				vdev->name);
552 
553 	rc = spdk_bdev_register(bdev);
554 	if (rc) {
555 		SPDK_ERRLOG("Failed to register bdev name=%s\n", bdev->name);
556 		spdk_io_device_unregister(bvdev, NULL);
557 		virtio_dev_stop(vdev);
558 		return rc;
559 	}
560 
561 	return 0;
562 }
563 
564 static struct virtio_blk_dev *
565 virtio_pci_blk_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
566 {
567 	static int pci_dev_counter = 0;
568 	struct virtio_blk_dev *bvdev;
569 	struct virtio_dev *vdev;
570 	char *default_name = NULL;
571 	uint16_t num_queues;
572 	int rc;
573 
574 	bvdev = calloc(1, sizeof(*bvdev));
575 	if (bvdev == NULL) {
576 		SPDK_ERRLOG("virtio device calloc failed\n");
577 		return NULL;
578 	}
579 	vdev = &bvdev->vdev;
580 
581 	if (name == NULL) {
582 		default_name = spdk_sprintf_alloc("VirtioBlk%"PRIu32, pci_dev_counter++);
583 		if (default_name == NULL) {
584 			free(vdev);
585 			return NULL;
586 		}
587 		name = default_name;
588 	}
589 
590 	rc = virtio_pci_dev_init(vdev, name, pci_ctx);
591 	free(default_name);
592 
593 	if (rc != 0) {
594 		free(bvdev);
595 		return NULL;
596 	}
597 
598 	rc = virtio_dev_reset(vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
599 	if (rc != 0) {
600 		goto fail;
601 	}
602 
603 	/* TODO: add a way to limit usable virtqueues */
604 	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
605 		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
606 						&num_queues, sizeof(num_queues));
607 		if (rc) {
608 			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
609 			goto fail;
610 		}
611 	} else {
612 		num_queues = 1;
613 	}
614 
615 	rc = virtio_blk_dev_init(bvdev, num_queues);
616 	if (rc != 0) {
617 		goto fail;
618 	}
619 
620 	return bvdev;
621 
622 fail:
623 	vdev->ctx = NULL;
624 	virtio_dev_destruct(vdev);
625 	free(bvdev);
626 	return NULL;
627 }
628 
629 static struct virtio_blk_dev *
630 virtio_user_blk_dev_create(const char *name, const char *path,
631 			   uint16_t num_queues, uint32_t queue_size)
632 {
633 	struct virtio_blk_dev *bvdev;
634 	int rc;
635 
636 	bvdev = calloc(1, sizeof(*bvdev));
637 	if (bvdev == NULL) {
638 		SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
639 		return NULL;
640 	}
641 
642 	rc = virtio_user_dev_init(&bvdev->vdev, name, path, queue_size);
643 	if (rc != 0) {
644 		SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
645 		free(bvdev);
646 		return NULL;
647 	}
648 
649 	rc = virtio_dev_reset(&bvdev->vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
650 	if (rc != 0) {
651 		virtio_dev_destruct(&bvdev->vdev);
652 		free(bvdev);
653 		return NULL;
654 	}
655 
656 	rc = virtio_blk_dev_init(bvdev, num_queues);
657 	if (rc != 0) {
658 		virtio_dev_destruct(&bvdev->vdev);
659 		free(bvdev);
660 		return NULL;
661 	}
662 
663 	return bvdev;
664 }
665 
666 struct bdev_virtio_pci_dev_create_ctx {
667 	const char *name;
668 	struct virtio_blk_dev *ret;
669 };
670 
671 static int
672 bdev_virtio_pci_blk_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
673 {
674 	struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
675 
676 	create_ctx->ret = virtio_pci_blk_dev_create(create_ctx->name, pci_ctx);
677 	if (create_ctx->ret == NULL) {
678 		return -1;
679 	}
680 
681 	return 0;
682 }
683 
684 struct spdk_bdev *
685 bdev_virtio_pci_blk_dev_create(const char *name, struct spdk_pci_addr *pci_addr)
686 {
687 	struct bdev_virtio_pci_dev_create_ctx create_ctx;
688 
689 	create_ctx.name = name;
690 	create_ctx.ret = NULL;
691 
692 	virtio_pci_dev_attach(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
693 			      VIRTIO_ID_BLOCK, pci_addr);
694 
695 	if (create_ctx.ret == NULL) {
696 		return NULL;
697 	}
698 
699 	return &create_ctx.ret->bdev;
700 }
701 
702 static int
703 bdev_virtio_pci_blk_monitor(void *arg)
704 {
705 	const char *vdev_name;
706 	struct bdev_virtio_pci_dev_create_ctx create_ctx;
707 
708 	while ((vdev_name = virtio_pci_dev_event_process(g_blk_hotplug_fd, VIRTIO_ID_BLOCK)) != NULL) {
709 		bdev_virtio_blk_dev_remove(vdev_name, NULL, NULL);
710 	}
711 
712 	/* Enumerate virtio pci_blk device */
713 	memset(&create_ctx, 0, sizeof(create_ctx));
714 	virtio_pci_dev_enumerate(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
715 				 VIRTIO_ID_BLOCK);
716 
717 	return SPDK_POLLER_BUSY;
718 }
719 
720 int
721 bdev_virtio_pci_blk_set_hotplug(bool enabled, uint64_t period_us)
722 {
723 	if (enabled == true && !spdk_process_is_primary()) {
724 		return -EPERM;
725 	}
726 
727 	if (g_blk_hotplug_poller) {
728 		close(g_blk_hotplug_fd);
729 		spdk_poller_unregister(&g_blk_hotplug_poller);
730 	}
731 
732 	if (!enabled) {
733 		return 0;
734 	}
735 
736 	g_blk_hotplug_fd = spdk_pci_event_listen();
737 	if (g_blk_hotplug_fd < 0) {
738 		return g_blk_hotplug_fd;
739 	}
740 
741 	period_us = period_us ? period_us : VIRTIO_BLK_HOTPLUG_POLL_PERIOD_DEFAULT;
742 	period_us = spdk_min(period_us, VIRTIO_BLK_HOTPLUG_POLL_PERIOD_MAX);
743 	g_blk_hotplug_poller = spdk_poller_register(bdev_virtio_pci_blk_monitor, NULL, period_us);
744 	if (!g_blk_hotplug_poller) {
745 		close(g_blk_hotplug_fd);
746 		return -1;
747 	}
748 
749 	return 0;
750 }
751 
752 static int
753 bdev_virtio_initialize(void)
754 {
755 	return 0;
756 }
757 
758 struct spdk_bdev *
759 bdev_virtio_user_blk_dev_create(const char *name, const char *path,
760 				unsigned num_queues, unsigned queue_size)
761 {
762 	struct virtio_blk_dev *bvdev;
763 
764 	bvdev = virtio_user_blk_dev_create(name, path, num_queues, queue_size);
765 	if (bvdev == NULL) {
766 		return NULL;
767 	}
768 
769 	return &bvdev->bdev;
770 }
771 
772 static int
773 bdev_virtio_blk_get_ctx_size(void)
774 {
775 	return sizeof(struct virtio_blk_io_ctx);
776 }
777 
778 SPDK_LOG_REGISTER_COMPONENT(virtio_blk)
779