xref: /dpdk/lib/vhost/vduse.c (revision 21a66096bb44a4468353782c36fc85913520dc6c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2023 Red Hat, Inc.
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <unistd.h>
8 #include <fcntl.h>
9 
10 
11 #include <uapi/linux/vduse.h>
12 #include <linux/virtio_net.h>
13 
14 #include <sys/ioctl.h>
15 #include <sys/mman.h>
16 #include <sys/stat.h>
17 
18 #include <rte_common.h>
19 #include <rte_thread.h>
20 
21 #include "fd_man.h"
22 #include "iotlb.h"
23 #include "vduse.h"
24 #include "vhost.h"
25 #include "virtio_net_ctrl.h"
26 
27 #define VHOST_VDUSE_API_VERSION 0
28 #define VDUSE_CTRL_PATH "/dev/vduse/control"
29 
30 struct vduse {
31 	struct fdset *fdset;
32 };
33 
34 static struct vduse vduse;
35 
36 static const char * const vduse_reqs_str[] = {
37 	"VDUSE_GET_VQ_STATE",
38 	"VDUSE_SET_STATUS",
39 	"VDUSE_UPDATE_IOTLB",
40 };
41 
42 #define vduse_req_id_to_str(id) \
43 	(id < RTE_DIM(vduse_reqs_str) ? \
44 	vduse_reqs_str[id] : "Unknown")
45 
46 static int
47 vduse_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
48 {
49 	return ioctl(dev->vduse_dev_fd, VDUSE_VQ_INJECT_IRQ, &vq->index);
50 }
51 
52 static void
53 vduse_iotlb_remove_notify(uint64_t addr, uint64_t offset, uint64_t size)
54 {
55 	munmap((void *)(uintptr_t)addr, offset + size);
56 }
57 
58 static int
59 vduse_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm __rte_unused)
60 {
61 	struct vduse_iotlb_entry entry;
62 	uint64_t size, page_size;
63 	struct stat stat;
64 	void *mmap_addr;
65 	int fd, ret;
66 
67 	entry.start = iova;
68 	entry.last = iova + 1;
69 
70 	ret = ioctl(dev->vduse_dev_fd, VDUSE_IOTLB_GET_FD, &entry);
71 	if (ret < 0) {
72 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get IOTLB entry for 0x%" PRIx64,
73 				iova);
74 		return -1;
75 	}
76 
77 	fd = ret;
78 
79 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "New IOTLB entry:");
80 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tIOVA: %" PRIx64 " - %" PRIx64,
81 			(uint64_t)entry.start, (uint64_t)entry.last);
82 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\toffset: %" PRIx64, (uint64_t)entry.offset);
83 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tfd: %d", fd);
84 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tperm: %x", entry.perm);
85 
86 	size = entry.last - entry.start + 1;
87 	mmap_addr = mmap(0, size + entry.offset, entry.perm, MAP_SHARED, fd, 0);
88 	if (!mmap_addr) {
89 		VHOST_CONFIG_LOG(dev->ifname, ERR,
90 				"Failed to mmap IOTLB entry for 0x%" PRIx64, iova);
91 		ret = -1;
92 		goto close_fd;
93 	}
94 
95 	ret = fstat(fd, &stat);
96 	if (ret < 0) {
97 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get page size.");
98 		munmap(mmap_addr, entry.offset + size);
99 		goto close_fd;
100 	}
101 	page_size = (uint64_t)stat.st_blksize;
102 
103 	vhost_user_iotlb_cache_insert(dev, entry.start, (uint64_t)(uintptr_t)mmap_addr,
104 		entry.offset, size, page_size, entry.perm);
105 
106 	ret = 0;
107 close_fd:
108 	close(fd);
109 
110 	return ret;
111 }
112 
113 static struct vhost_backend_ops vduse_backend_ops = {
114 	.iotlb_miss = vduse_iotlb_miss,
115 	.iotlb_remove_notify = vduse_iotlb_remove_notify,
116 	.inject_irq = vduse_inject_irq,
117 };
118 
119 static void
120 vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
121 {
122 	struct virtio_net *dev = arg;
123 	uint64_t buf;
124 	int ret;
125 
126 	ret = read(fd, &buf, sizeof(buf));
127 	if (ret < 0) {
128 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read control queue event: %s",
129 				strerror(errno));
130 		return;
131 	}
132 
133 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "Control queue kicked");
134 	if (virtio_net_ctrl_handle(dev))
135 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to handle ctrl request");
136 }
137 
138 static void
139 vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
140 {
141 	struct vhost_virtqueue *vq = dev->virtqueue[index];
142 	struct vhost_vring_addr *ra = &vq->ring_addrs;
143 	struct vduse_vq_info vq_info;
144 	struct vduse_vq_eventfd vq_efd;
145 	int ret;
146 
147 	vq_info.index = index;
148 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_GET_INFO, &vq_info);
149 	if (ret) {
150 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get VQ %u info: %s",
151 				index, strerror(errno));
152 		return;
153 	}
154 
155 	if (reconnect) {
156 		vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
157 		vq->last_used_idx = vq->reconnect_log->last_avail_idx;
158 	} else {
159 		vq->last_avail_idx = vq_info.split.avail_index;
160 		vq->last_used_idx = vq_info.split.avail_index;
161 	}
162 	vq->size = vq_info.num;
163 	vq->ready = true;
164 	vq->enabled = vq_info.ready;
165 	ra->desc_user_addr = vq_info.desc_addr;
166 	ra->avail_user_addr = vq_info.driver_addr;
167 	ra->used_user_addr = vq_info.device_addr;
168 	VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
169 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
170 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
171 			(unsigned long long)vq_info.desc_addr);
172 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdriver_addr: %llx",
173 			(unsigned long long)vq_info.driver_addr);
174 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdevice_addr: %llx",
175 			(unsigned long long)vq_info.device_addr);
176 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq->last_avail_idx);
177 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tused_idx: %u", vq->last_used_idx);
178 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
179 	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
180 	if (vq->kickfd < 0) {
181 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
182 				index, strerror(errno));
183 		vq->kickfd = VIRTIO_INVALID_EVENTFD;
184 		return;
185 	}
186 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tkick fd: %d", vq->kickfd);
187 
188 	vq->shadow_used_split = rte_malloc_socket(NULL,
189 				vq->size * sizeof(struct vring_used_elem),
190 				RTE_CACHE_LINE_SIZE, 0);
191 	vq->batch_copy_elems = rte_malloc_socket(NULL,
192 				vq->size * sizeof(struct batch_copy_elem),
193 				RTE_CACHE_LINE_SIZE, 0);
194 
195 	rte_rwlock_write_lock(&vq->access_lock);
196 	vhost_user_iotlb_rd_lock(vq);
197 	if (vring_translate(dev, vq))
198 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to translate vring %d addresses",
199 				index);
200 
201 	if (vhost_enable_guest_notification(dev, vq, 0))
202 		VHOST_CONFIG_LOG(dev->ifname, ERR,
203 				"Failed to disable guest notifications on vring %d",
204 				index);
205 	vhost_user_iotlb_rd_unlock(vq);
206 	rte_rwlock_write_unlock(&vq->access_lock);
207 
208 	vq_efd.index = index;
209 	vq_efd.fd = vq->kickfd;
210 
211 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
212 	if (ret) {
213 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to setup kickfd for VQ %u: %s",
214 				index, strerror(errno));
215 		close(vq->kickfd);
216 		vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
217 		return;
218 	}
219 
220 	if (vq == dev->cvq) {
221 		ret = fdset_add(vduse.fdset, vq->kickfd, vduse_control_queue_event, NULL, dev);
222 		if (ret) {
223 			VHOST_CONFIG_LOG(dev->ifname, ERR,
224 					"Failed to setup kickfd handler for VQ %u: %s",
225 					index, strerror(errno));
226 			vq_efd.fd = VDUSE_EVENTFD_DEASSIGN;
227 			ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
228 			close(vq->kickfd);
229 			vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
230 		}
231 		vhost_enable_guest_notification(dev, vq, 1);
232 		VHOST_CONFIG_LOG(dev->ifname, INFO, "Ctrl queue event handler installed");
233 	}
234 }
235 
236 static void
237 vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
238 {
239 	struct vhost_virtqueue *vq = dev->virtqueue[index];
240 	struct vduse_vq_eventfd vq_efd;
241 	int ret;
242 
243 	if (vq == dev->cvq && vq->kickfd >= 0)
244 		fdset_del(vduse.fdset, vq->kickfd);
245 
246 	vq_efd.index = index;
247 	vq_efd.fd = VDUSE_EVENTFD_DEASSIGN;
248 
249 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
250 	if (ret)
251 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to cleanup kickfd for VQ %u: %s",
252 				index, strerror(errno));
253 
254 	close(vq->kickfd);
255 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
256 
257 	rte_rwlock_write_lock(&vq->access_lock);
258 	vring_invalidate(dev, vq);
259 	rte_rwlock_write_unlock(&vq->access_lock);
260 
261 	rte_free(vq->batch_copy_elems);
262 	vq->batch_copy_elems = NULL;
263 
264 	rte_free(vq->shadow_used_split);
265 	vq->shadow_used_split = NULL;
266 
267 	vq->enabled = false;
268 	vq->ready = false;
269 	vq->size = 0;
270 	vq->last_used_idx = 0;
271 	vq->last_avail_idx = 0;
272 }
273 
274 static void
275 vduse_device_start(struct virtio_net *dev, bool reconnect)
276 {
277 	unsigned int i, ret;
278 
279 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Starting device...");
280 
281 	dev->notify_ops = vhost_driver_callback_get(dev->ifname);
282 	if (!dev->notify_ops) {
283 		VHOST_CONFIG_LOG(dev->ifname, ERR,
284 				"Failed to get callback ops for driver");
285 		return;
286 	}
287 
288 	ret = ioctl(dev->vduse_dev_fd, VDUSE_DEV_GET_FEATURES, &dev->features);
289 	if (ret) {
290 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get features: %s",
291 				strerror(errno));
292 		return;
293 	}
294 
295 	if (reconnect && dev->features != dev->reconnect_log->features) {
296 		VHOST_CONFIG_LOG(dev->ifname, ERR,
297 				"Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,
298 				dev->reconnect_log->features, dev->features);
299 		return;
300 	}
301 
302 	dev->reconnect_log->features = dev->features;
303 
304 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
305 		dev->features);
306 
307 	if (dev->features &
308 		((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
309 		 (1ULL << VIRTIO_F_VERSION_1) |
310 		 (1ULL << VIRTIO_F_RING_PACKED))) {
311 		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
312 	} else {
313 		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
314 	}
315 
316 	for (i = 0; i < dev->nr_vring; i++)
317 		vduse_vring_setup(dev, i, reconnect);
318 
319 	dev->flags |= VIRTIO_DEV_READY;
320 
321 	if (dev->notify_ops->new_device(dev->vid) == 0)
322 		dev->flags |= VIRTIO_DEV_RUNNING;
323 
324 	for (i = 0; i < dev->nr_vring; i++) {
325 		struct vhost_virtqueue *vq = dev->virtqueue[i];
326 
327 		if (vq == dev->cvq)
328 			continue;
329 
330 		if (dev->notify_ops->vring_state_changed)
331 			dev->notify_ops->vring_state_changed(dev->vid, i, vq->enabled);
332 	}
333 }
334 
335 static void
336 vduse_device_stop(struct virtio_net *dev)
337 {
338 	unsigned int i;
339 
340 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Stopping device...");
341 
342 	vhost_destroy_device_notify(dev);
343 
344 	dev->flags &= ~VIRTIO_DEV_READY;
345 
346 	for (i = 0; i < dev->nr_vring; i++)
347 		vduse_vring_cleanup(dev, i);
348 
349 	vhost_user_iotlb_flush_all(dev);
350 }
351 
352 static void
353 vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
354 {
355 	struct virtio_net *dev = arg;
356 	struct vduse_dev_request req;
357 	struct vduse_dev_response resp;
358 	struct vhost_virtqueue *vq;
359 	uint8_t old_status = dev->status;
360 	int ret;
361 
362 	memset(&resp, 0, sizeof(resp));
363 
364 	ret = read(fd, &req, sizeof(req));
365 	if (ret < 0) {
366 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read request: %s",
367 				strerror(errno));
368 		return;
369 	} else if (ret < (int)sizeof(req)) {
370 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Incomplete to read request %d", ret);
371 		return;
372 	}
373 
374 	VHOST_CONFIG_LOG(dev->ifname, INFO, "New request: %s (%u)",
375 			vduse_req_id_to_str(req.type), req.type);
376 
377 	switch (req.type) {
378 	case VDUSE_GET_VQ_STATE:
379 		vq = dev->virtqueue[req.vq_state.index];
380 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tvq index: %u, avail_index: %u",
381 				req.vq_state.index, vq->last_avail_idx);
382 		resp.vq_state.split.avail_index = vq->last_avail_idx;
383 		resp.result = VDUSE_REQ_RESULT_OK;
384 		break;
385 	case VDUSE_SET_STATUS:
386 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnew status: 0x%08x",
387 				req.s.status);
388 		old_status = dev->status;
389 		dev->status = req.s.status;
390 		dev->reconnect_log->status = dev->status;
391 		resp.result = VDUSE_REQ_RESULT_OK;
392 		break;
393 	case VDUSE_UPDATE_IOTLB:
394 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tIOVA range: %" PRIx64 " - %" PRIx64,
395 				(uint64_t)req.iova.start, (uint64_t)req.iova.last);
396 		vhost_user_iotlb_cache_remove(dev, req.iova.start,
397 				req.iova.last - req.iova.start + 1);
398 		resp.result = VDUSE_REQ_RESULT_OK;
399 		break;
400 	default:
401 		resp.result = VDUSE_REQ_RESULT_FAILED;
402 		break;
403 	}
404 
405 	resp.request_id = req.request_id;
406 
407 	ret = write(dev->vduse_dev_fd, &resp, sizeof(resp));
408 	if (ret != sizeof(resp)) {
409 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to write response %s",
410 				strerror(errno));
411 		return;
412 	}
413 
414 	if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
415 		if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
416 			vduse_device_start(dev, false);
417 		else
418 			vduse_device_stop(dev);
419 	}
420 
421 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Request %s (%u) handled successfully",
422 			vduse_req_id_to_str(req.type), req.type);
423 }
424 
425 static char vduse_reconnect_dir[PATH_MAX];
426 static bool vduse_reconnect_path_set;
427 
428 static int
429 vduse_reconnect_path_init(void)
430 {
431 	const char *directory;
432 	int ret;
433 
434 	if (vduse_reconnect_path_set == true)
435 		return 0;
436 
437 	/* from RuntimeDirectory= see systemd.exec */
438 	directory = getenv("RUNTIME_DIRECTORY");
439 	if (directory == NULL) {
440 		/*
441 		 * Used standard convention defined in
442 		 * XDG Base Directory Specification and
443 		 * Filesystem Hierarchy Standard.
444 		 */
445 		if (getuid() == 0)
446 			directory = "/var/run";
447 		else
448 			directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
449 	}
450 
451 	ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/vduse",
452 			directory);
453 	if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
454 		VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
455 		return -1;
456 	}
457 
458 	ret = mkdir(vduse_reconnect_dir, 0700);
459 	if (ret < 0 && errno != EEXIST) {
460 		VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
461 				vduse_reconnect_dir, strerror(errno));
462 		return -1;
463 	}
464 
465 	VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
466 			vduse_reconnect_dir);
467 
468 	vduse_reconnect_path_set = true;
469 
470 	return 0;
471 }
472 
473 static int
474 vduse_reconnect_log_map(struct virtio_net *dev, bool create)
475 {
476 	char reco_file[PATH_MAX];
477 	int fd, ret;
478 	const char *name = dev->ifname + strlen("/dev/vduse/");
479 
480 	if (vduse_reconnect_path_init() < 0) {
481 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to initialize reconnect path");
482 		return -1;
483 	}
484 
485 	ret = snprintf(reco_file, sizeof(reco_file), "%s/%s", vduse_reconnect_dir, name);
486 	if (ret < 0 || ret == sizeof(reco_file)) {
487 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to create vduse reconnect path name");
488 		return -1;
489 	}
490 
491 	if (create) {
492 		fd = open(reco_file, O_CREAT | O_EXCL | O_RDWR, 0600);
493 		if (fd < 0) {
494 			if (errno == EEXIST) {
495 				VHOST_CONFIG_LOG(dev->ifname, ERR, "Reconnect file %s exists but not the device",
496 						reco_file);
497 			} else {
498 				VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to open reconnect file %s (%s)",
499 						reco_file, strerror(errno));
500 			}
501 			return -1;
502 		}
503 
504 		ret = ftruncate(fd, sizeof(*dev->reconnect_log));
505 		if (ret < 0) {
506 			VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to truncate reconnect file %s (%s)",
507 					reco_file, strerror(errno));
508 			goto out_close;
509 		}
510 	} else {
511 		fd = open(reco_file, O_RDWR, 0600);
512 		if (fd < 0) {
513 			if (errno == ENOENT)
514 				VHOST_CONFIG_LOG(dev->ifname, ERR, "Missing reconnect file (%s)", reco_file);
515 			else
516 				VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to open reconnect file %s (%s)",
517 						reco_file, strerror(errno));
518 			return -1;
519 		}
520 	}
521 
522 	dev->reconnect_log = mmap(NULL, sizeof(*dev->reconnect_log), PROT_READ | PROT_WRITE,
523 				MAP_SHARED, fd, 0);
524 	if (dev->reconnect_log == MAP_FAILED) {
525 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to mmap reconnect file %s (%s)",
526 				reco_file, strerror(errno));
527 		ret = -1;
528 		goto out_close;
529 	}
530 	ret = 0;
531 
532 out_close:
533 	close(fd);
534 
535 	return ret;
536 }
537 
538 static int
539 vduse_reconnect_log_check(struct virtio_net *dev, uint64_t features, uint32_t total_queues)
540 {
541 	if (dev->reconnect_log->version != VHOST_RECONNECT_VERSION) {
542 		VHOST_CONFIG_LOG(dev->ifname, ERR,
543 				"Version mismatch between backend (0x%x) & reconnection file (0x%x)",
544 				VHOST_RECONNECT_VERSION, dev->reconnect_log->version);
545 		return -1;
546 	}
547 
548 	if ((dev->reconnect_log->features & features) != dev->reconnect_log->features) {
549 		VHOST_CONFIG_LOG(dev->ifname, ERR,
550 				"Features mismatch between backend (0x%" PRIx64 ") & reconnection file (0x%" PRIx64 ")",
551 				features, dev->reconnect_log->features);
552 		return -1;
553 	}
554 
555 	if (dev->reconnect_log->nr_vrings != total_queues) {
556 		VHOST_CONFIG_LOG(dev->ifname, ERR,
557 				"Queues number mismatch between backend (%u) and reconnection file (%u)",
558 				total_queues, dev->reconnect_log->nr_vrings);
559 		return -1;
560 	}
561 
562 	return 0;
563 }
564 
565 static void
566 vduse_reconnect_handler(int fd, void *arg, int *remove)
567 {
568 	struct virtio_net *dev = arg;
569 
570 	vduse_device_start(dev, true);
571 
572 	close(fd);
573 	*remove = 1;
574 }
575 
576 static int
577 vduse_reconnect_start_device(struct virtio_net *dev)
578 {
579 	int fd, ret;
580 
581 	/*
582 	 * Make vduse_device_start() being executed in the same
583 	 * context for both reconnection and fresh startup.
584 	 */
585 	fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
586 	if (fd < 0) {
587 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to create reconnect efd: %s",
588 				strerror(errno));
589 		ret = -1;
590 		goto out_err;
591 	}
592 
593 	ret = fdset_add(vduse.fdset, fd, vduse_reconnect_handler, NULL, dev);
594 	if (ret) {
595 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to add reconnect efd %d to vduse fdset",
596 				fd);
597 		goto out_err_close;
598 	}
599 
600 	ret = eventfd_write(fd, (eventfd_t)1);
601 	if (ret < 0) {
602 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to write to reconnect eventfd");
603 		goto out_err_fdset;
604 	}
605 
606 	return 0;
607 
608 out_err_fdset:
609 	fdset_del(vduse.fdset, fd);
610 out_err_close:
611 	close(fd);
612 out_err:
613 	return ret;
614 }
615 
616 int
617 vduse_device_create(const char *path, bool compliant_ol_flags)
618 {
619 	int control_fd, dev_fd, vid, ret;
620 	uint32_t i, max_queue_pairs, total_queues;
621 	struct virtio_net *dev;
622 	struct virtio_net_config vnet_config = {{ 0 }};
623 	uint64_t ver = VHOST_VDUSE_API_VERSION;
624 	uint64_t features;
625 	const char *name = path + strlen("/dev/vduse/");
626 	bool reconnect = false;
627 
628 	if (vduse.fdset == NULL) {
629 		vduse.fdset = fdset_init("vduse-evt");
630 		if (vduse.fdset == NULL) {
631 			VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE fdset");
632 			return -1;
633 		}
634 	}
635 
636 	control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
637 	if (control_fd < 0) {
638 		VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
639 				VDUSE_CTRL_PATH, strerror(errno));
640 		return -1;
641 	}
642 
643 	if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) {
644 		VHOST_CONFIG_LOG(name, ERR, "Failed to set API version: %" PRIu64 ": %s",
645 				ver, strerror(errno));
646 		ret = -1;
647 		goto out_ctrl_close;
648 	}
649 
650 	ret = rte_vhost_driver_get_features(path, &features);
651 	if (ret < 0) {
652 		VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
653 		goto out_ctrl_close;
654 	}
655 
656 	ret = rte_vhost_driver_get_queue_num(path, &max_queue_pairs);
657 	if (ret < 0) {
658 		VHOST_CONFIG_LOG(name, ERR, "Failed to get max queue pairs");
659 		goto out_ctrl_close;
660 	}
661 
662 	VHOST_CONFIG_LOG(path, INFO, "VDUSE max queue pairs: %u", max_queue_pairs);
663 	total_queues = max_queue_pairs * 2;
664 
665 	if (max_queue_pairs == 1)
666 		features &= ~(RTE_BIT64(VIRTIO_NET_F_CTRL_VQ) | RTE_BIT64(VIRTIO_NET_F_MQ));
667 	else
668 		total_queues += 1; /* Includes ctrl queue */
669 
670 	dev_fd = open(path, O_RDWR);
671 	if (dev_fd >= 0) {
672 		VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
673 		reconnect = true;
674 	} else if (errno == ENOENT) {
675 		struct vduse_dev_config *dev_config;
676 
677 		dev_config = malloc(offsetof(struct vduse_dev_config, config) +
678 				sizeof(vnet_config));
679 		if (!dev_config) {
680 			VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
681 			ret = -1;
682 			goto out_ctrl_close;
683 		}
684 
685 		vnet_config.max_virtqueue_pairs = max_queue_pairs;
686 		memset(dev_config, 0, sizeof(struct vduse_dev_config));
687 
688 		rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
689 		dev_config->device_id = VIRTIO_ID_NET;
690 		dev_config->vendor_id = 0;
691 		dev_config->features = features;
692 		dev_config->vq_num = total_queues;
693 		dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
694 		dev_config->config_size = sizeof(struct virtio_net_config);
695 		memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
696 
697 		ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
698 		free(dev_config);
699 		dev_config = NULL;
700 		if (ret < 0) {
701 			VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
702 					strerror(errno));
703 			goto out_ctrl_close;
704 		}
705 
706 		dev_fd = open(path, O_RDWR);
707 		if (dev_fd < 0) {
708 			VHOST_CONFIG_LOG(name, ERR, "Failed to open newly created device %s: %s",
709 					path, strerror(errno));
710 			ret = -1;
711 			goto out_ctrl_close;
712 		}
713 	} else {
714 		VHOST_CONFIG_LOG(name, ERR, "Failed to open device %s: %s",
715 				path, strerror(errno));
716 		ret = -1;
717 		goto out_ctrl_close;
718 	}
719 
720 	ret = fcntl(dev_fd, F_SETFL, O_NONBLOCK);
721 	if (ret < 0) {
722 		VHOST_CONFIG_LOG(name, ERR, "Failed to set chardev as non-blocking: %s",
723 				strerror(errno));
724 		goto out_dev_close;
725 	}
726 
727 	vid = vhost_new_device(&vduse_backend_ops);
728 	if (vid < 0) {
729 		VHOST_CONFIG_LOG(name, ERR, "Failed to create new Vhost device");
730 		ret = -1;
731 		goto out_dev_close;
732 	}
733 
734 	dev = get_device(vid);
735 	if (!dev) {
736 		ret = -1;
737 		goto out_dev_destroy;
738 	}
739 
740 	strncpy(dev->ifname, path, IF_NAME_SZ - 1);
741 	dev->vduse_ctrl_fd = control_fd;
742 	dev->vduse_dev_fd = dev_fd;
743 
744 	ret = vduse_reconnect_log_map(dev, !reconnect);
745 	if (ret < 0)
746 		goto out_dev_destroy;
747 
748 	if (reconnect) {
749 		ret = vduse_reconnect_log_check(dev, features, total_queues);
750 		if (ret < 0)
751 			goto out_log_unmap;
752 
753 		dev->status = dev->reconnect_log->status;
754 	} else {
755 		dev->reconnect_log->version = VHOST_RECONNECT_VERSION;
756 		dev->reconnect_log->nr_vrings = total_queues;
757 		memcpy(&dev->reconnect_log->config, &vnet_config, sizeof(vnet_config));
758 	}
759 
760 	vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
761 
762 	for (i = 0; i < total_queues; i++) {
763 		struct vduse_vq_config vq_cfg = { 0 };
764 		struct vhost_virtqueue *vq;
765 
766 		ret = alloc_vring_queue(dev, i);
767 		if (ret) {
768 			VHOST_CONFIG_LOG(name, ERR, "Failed to alloc vring %d metadata", i);
769 			goto out_log_unmap;
770 		}
771 
772 		vq = dev->virtqueue[i];
773 		vq->reconnect_log = &dev->reconnect_log->vring[i];
774 
775 		if (reconnect)
776 			continue;
777 
778 		vq_cfg.index = i;
779 		vq_cfg.max_size = 1024;
780 
781 		ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP, &vq_cfg);
782 		if (ret) {
783 			VHOST_CONFIG_LOG(name, ERR, "Failed to set-up VQ %d", i);
784 			goto out_log_unmap;
785 		}
786 	}
787 
788 	dev->cvq = dev->virtqueue[max_queue_pairs * 2];
789 
790 	ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler, NULL, dev);
791 	if (ret) {
792 		VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
793 				dev->vduse_dev_fd);
794 		goto out_log_unmap;
795 	}
796 
797 	if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
798 		ret = vduse_reconnect_start_device(dev);
799 		if (ret)
800 			goto out_log_unmap;
801 	}
802 
803 	return 0;
804 
805 out_log_unmap:
806 	munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
807 out_dev_destroy:
808 	vhost_destroy_device(vid);
809 out_dev_close:
810 	if (dev_fd >= 0)
811 		close(dev_fd);
812 	ioctl(control_fd, VDUSE_DESTROY_DEV, name);
813 out_ctrl_close:
814 	close(control_fd);
815 
816 	return ret;
817 }
818 
819 int
820 vduse_device_destroy(const char *path)
821 {
822 	const char *name = path + strlen("/dev/vduse/");
823 	struct virtio_net *dev;
824 	int vid, ret;
825 
826 	for (vid = 0; vid < RTE_MAX_VHOST_DEVICE; vid++) {
827 		dev = vhost_devices[vid];
828 
829 		if (dev == NULL)
830 			continue;
831 
832 		if (!strcmp(path, dev->ifname))
833 			break;
834 	}
835 
836 	if (vid == RTE_MAX_VHOST_DEVICE)
837 		return -1;
838 
839 	if (dev->reconnect_log)
840 		munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
841 
842 	vduse_device_stop(dev);
843 
844 	fdset_del(vduse.fdset, dev->vduse_dev_fd);
845 
846 	if (dev->vduse_dev_fd >= 0) {
847 		close(dev->vduse_dev_fd);
848 		dev->vduse_dev_fd = -1;
849 	}
850 
851 	if (dev->vduse_ctrl_fd >= 0) {
852 		char reconnect_file[PATH_MAX];
853 
854 		ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
855 		if (ret) {
856 			VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
857 					strerror(errno));
858 		} else {
859 			/*
860 			 * VDUSE device was no more attached to the vDPA bus,
861 			 * so we can remove the reconnect file.
862 			 */
863 			ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
864 					vduse_reconnect_dir, name);
865 			if (ret < 0 || ret == sizeof(reconnect_file))
866 				VHOST_CONFIG_LOG(name, ERR,
867 						"Failed to create vduse reconnect path name");
868 			else
869 				unlink(reconnect_file);
870 		}
871 
872 		close(dev->vduse_ctrl_fd);
873 		dev->vduse_ctrl_fd = -1;
874 	}
875 
876 	vhost_destroy_device(vid);
877 
878 	return 0;
879 }
880