xref: /dpdk/lib/vhost/vduse.c (revision 7917b0d38e92e8b9ec5a870415b791420e10f11a)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2023 Red Hat, Inc.
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <unistd.h>
8 #include <fcntl.h>
9 
10 
11 #include <linux/vduse.h>
12 #include <linux/virtio_net.h>
13 
14 #include <sys/ioctl.h>
15 #include <sys/mman.h>
16 #include <sys/stat.h>
17 
18 #include <rte_common.h>
19 #include <rte_thread.h>
20 
21 #include "fd_man.h"
22 #include "iotlb.h"
23 #include "vduse.h"
24 #include "vhost.h"
25 #include "virtio_net_ctrl.h"
26 
27 #define VHOST_VDUSE_API_VERSION 0
28 #define VDUSE_CTRL_PATH "/dev/vduse/control"
29 
30 struct vduse {
31 	struct fdset *fdset;
32 };
33 
34 static struct vduse vduse;
35 
36 static const char * const vduse_reqs_str[] = {
37 	"VDUSE_GET_VQ_STATE",
38 	"VDUSE_SET_STATUS",
39 	"VDUSE_UPDATE_IOTLB",
40 };
41 
42 #define vduse_req_id_to_str(id) \
43 	(id < RTE_DIM(vduse_reqs_str) ? \
44 	vduse_reqs_str[id] : "Unknown")
45 
46 static int
47 vduse_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
48 {
49 	return ioctl(dev->vduse_dev_fd, VDUSE_VQ_INJECT_IRQ, &vq->index);
50 }
51 
52 static void
53 vduse_iotlb_remove_notify(uint64_t addr, uint64_t offset, uint64_t size)
54 {
55 	munmap((void *)(uintptr_t)addr, offset + size);
56 }
57 
58 static int
59 vduse_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm __rte_unused)
60 {
61 	struct vduse_iotlb_entry entry;
62 	uint64_t size, page_size;
63 	struct stat stat;
64 	void *mmap_addr;
65 	int fd, ret;
66 
67 	entry.start = iova;
68 	entry.last = iova + 1;
69 
70 	ret = ioctl(dev->vduse_dev_fd, VDUSE_IOTLB_GET_FD, &entry);
71 	if (ret < 0) {
72 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get IOTLB entry for 0x%" PRIx64,
73 				iova);
74 		return -1;
75 	}
76 
77 	fd = ret;
78 
79 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "New IOTLB entry:");
80 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tIOVA: %" PRIx64 " - %" PRIx64,
81 			(uint64_t)entry.start, (uint64_t)entry.last);
82 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\toffset: %" PRIx64, (uint64_t)entry.offset);
83 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tfd: %d", fd);
84 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tperm: %x", entry.perm);
85 
86 	size = entry.last - entry.start + 1;
87 	mmap_addr = mmap(0, size + entry.offset, entry.perm, MAP_SHARED, fd, 0);
88 	if (!mmap_addr) {
89 		VHOST_CONFIG_LOG(dev->ifname, ERR,
90 				"Failed to mmap IOTLB entry for 0x%" PRIx64, iova);
91 		ret = -1;
92 		goto close_fd;
93 	}
94 
95 	ret = fstat(fd, &stat);
96 	if (ret < 0) {
97 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get page size.");
98 		munmap(mmap_addr, entry.offset + size);
99 		goto close_fd;
100 	}
101 	page_size = (uint64_t)stat.st_blksize;
102 
103 	vhost_user_iotlb_cache_insert(dev, entry.start, (uint64_t)(uintptr_t)mmap_addr,
104 		entry.offset, size, page_size, entry.perm);
105 
106 	ret = 0;
107 close_fd:
108 	close(fd);
109 
110 	return ret;
111 }
112 
113 static struct vhost_backend_ops vduse_backend_ops = {
114 	.iotlb_miss = vduse_iotlb_miss,
115 	.iotlb_remove_notify = vduse_iotlb_remove_notify,
116 	.inject_irq = vduse_inject_irq,
117 };
118 
119 static void
120 vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
121 {
122 	struct virtio_net *dev = arg;
123 	uint64_t buf;
124 	int ret;
125 
126 	ret = read(fd, &buf, sizeof(buf));
127 	if (ret < 0) {
128 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read control queue event: %s",
129 				strerror(errno));
130 		return;
131 	}
132 
133 	VHOST_CONFIG_LOG(dev->ifname, DEBUG, "Control queue kicked");
134 	if (virtio_net_ctrl_handle(dev))
135 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to handle ctrl request");
136 }
137 
138 static void
139 vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
140 {
141 	struct vhost_virtqueue *vq = dev->virtqueue[index];
142 	struct vhost_vring_addr *ra = &vq->ring_addrs;
143 	struct vduse_vq_info vq_info;
144 	struct vduse_vq_eventfd vq_efd;
145 	int ret;
146 
147 	vq_info.index = index;
148 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_GET_INFO, &vq_info);
149 	if (ret) {
150 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get VQ %u info: %s",
151 				index, strerror(errno));
152 		return;
153 	}
154 
155 	if (reconnect) {
156 		vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
157 		vq->last_used_idx = vq->reconnect_log->last_avail_idx;
158 	} else {
159 		vq->last_avail_idx = vq_info.split.avail_index;
160 		vq->last_used_idx = vq_info.split.avail_index;
161 	}
162 	vq->size = vq_info.num;
163 	vq->ready = true;
164 	vq->enabled = vq_info.ready;
165 	ra->desc_user_addr = vq_info.desc_addr;
166 	ra->avail_user_addr = vq_info.driver_addr;
167 	ra->used_user_addr = vq_info.device_addr;
168 	VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
169 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
170 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
171 			(unsigned long long)vq_info.desc_addr);
172 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdriver_addr: %llx",
173 			(unsigned long long)vq_info.driver_addr);
174 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdevice_addr: %llx",
175 			(unsigned long long)vq_info.device_addr);
176 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq->last_avail_idx);
177 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tused_idx: %u", vq->last_used_idx);
178 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
179 	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
180 	if (vq->kickfd < 0) {
181 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
182 				index, strerror(errno));
183 		vq->kickfd = VIRTIO_INVALID_EVENTFD;
184 		return;
185 	}
186 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tkick fd: %d", vq->kickfd);
187 
188 	vq->shadow_used_split = rte_malloc_socket(NULL,
189 				vq->size * sizeof(struct vring_used_elem),
190 				RTE_CACHE_LINE_SIZE, 0);
191 	vq->batch_copy_elems = rte_malloc_socket(NULL,
192 				vq->size * sizeof(struct batch_copy_elem),
193 				RTE_CACHE_LINE_SIZE, 0);
194 
195 	rte_rwlock_write_lock(&vq->access_lock);
196 	vhost_user_iotlb_rd_lock(vq);
197 	if (vring_translate(dev, vq))
198 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to translate vring %d addresses",
199 				index);
200 
201 	if (vhost_enable_guest_notification(dev, vq, 0))
202 		VHOST_CONFIG_LOG(dev->ifname, ERR,
203 				"Failed to disable guest notifications on vring %d",
204 				index);
205 	vhost_user_iotlb_rd_unlock(vq);
206 	rte_rwlock_write_unlock(&vq->access_lock);
207 
208 	vq_efd.index = index;
209 	vq_efd.fd = vq->kickfd;
210 
211 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
212 	if (ret) {
213 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to setup kickfd for VQ %u: %s",
214 				index, strerror(errno));
215 		close(vq->kickfd);
216 		vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
217 		return;
218 	}
219 
220 	if (vq == dev->cvq) {
221 		ret = fdset_add(vduse.fdset, vq->kickfd, vduse_control_queue_event, NULL, dev);
222 		if (ret) {
223 			VHOST_CONFIG_LOG(dev->ifname, ERR,
224 					"Failed to setup kickfd handler for VQ %u: %s",
225 					index, strerror(errno));
226 			vq_efd.fd = VDUSE_EVENTFD_DEASSIGN;
227 			ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
228 			close(vq->kickfd);
229 			vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
230 		}
231 		vhost_enable_guest_notification(dev, vq, 1);
232 		VHOST_CONFIG_LOG(dev->ifname, INFO, "Ctrl queue event handler installed");
233 	}
234 }
235 
236 static void
237 vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
238 {
239 	struct vhost_virtqueue *vq = dev->virtqueue[index];
240 	struct vduse_vq_eventfd vq_efd;
241 	int ret;
242 
243 	if (vq == dev->cvq && vq->kickfd >= 0)
244 		fdset_del(vduse.fdset, vq->kickfd);
245 
246 	vq_efd.index = index;
247 	vq_efd.fd = VDUSE_EVENTFD_DEASSIGN;
248 
249 	ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd);
250 	if (ret)
251 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to cleanup kickfd for VQ %u: %s",
252 				index, strerror(errno));
253 
254 	close(vq->kickfd);
255 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
256 
257 	rte_rwlock_write_lock(&vq->access_lock);
258 	vring_invalidate(dev, vq);
259 	rte_rwlock_write_unlock(&vq->access_lock);
260 
261 	rte_free(vq->batch_copy_elems);
262 	vq->batch_copy_elems = NULL;
263 
264 	rte_free(vq->shadow_used_split);
265 	vq->shadow_used_split = NULL;
266 
267 	vq->enabled = false;
268 	vq->ready = false;
269 	vq->size = 0;
270 	vq->last_used_idx = 0;
271 	vq->last_avail_idx = 0;
272 }
273 
274 static void
275 vduse_device_start(struct virtio_net *dev, bool reconnect)
276 {
277 	unsigned int i, ret;
278 
279 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Starting device...");
280 
281 	dev->notify_ops = vhost_driver_callback_get(dev->ifname);
282 	if (!dev->notify_ops) {
283 		VHOST_CONFIG_LOG(dev->ifname, ERR,
284 				"Failed to get callback ops for driver");
285 		return;
286 	}
287 
288 	ret = ioctl(dev->vduse_dev_fd, VDUSE_DEV_GET_FEATURES, &dev->features);
289 	if (ret) {
290 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get features: %s",
291 				strerror(errno));
292 		return;
293 	}
294 
295 	if (reconnect && dev->features != dev->reconnect_log->features) {
296 		VHOST_CONFIG_LOG(dev->ifname, ERR,
297 				"Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,
298 				dev->reconnect_log->features, dev->features);
299 		return;
300 	}
301 
302 	dev->reconnect_log->features = dev->features;
303 
304 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
305 		dev->features);
306 
307 	if (dev->features &
308 		((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
309 		 (1ULL << VIRTIO_F_VERSION_1) |
310 		 (1ULL << VIRTIO_F_RING_PACKED))) {
311 		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
312 	} else {
313 		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
314 	}
315 
316 	for (i = 0; i < dev->nr_vring; i++)
317 		vduse_vring_setup(dev, i, reconnect);
318 
319 	dev->flags |= VIRTIO_DEV_READY;
320 
321 	if (dev->notify_ops->new_device(dev->vid) == 0)
322 		dev->flags |= VIRTIO_DEV_RUNNING;
323 
324 	for (i = 0; i < dev->nr_vring; i++) {
325 		struct vhost_virtqueue *vq = dev->virtqueue[i];
326 
327 		if (vq == dev->cvq)
328 			continue;
329 
330 		if (dev->notify_ops->vring_state_changed)
331 			dev->notify_ops->vring_state_changed(dev->vid, i, vq->enabled);
332 	}
333 }
334 
335 static void
336 vduse_device_stop(struct virtio_net *dev)
337 {
338 	unsigned int i;
339 
340 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Stopping device...");
341 
342 	vhost_destroy_device_notify(dev);
343 
344 	dev->flags &= ~VIRTIO_DEV_READY;
345 
346 	for (i = 0; i < dev->nr_vring; i++)
347 		vduse_vring_cleanup(dev, i);
348 
349 	vhost_user_iotlb_flush_all(dev);
350 }
351 
352 static void
353 vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
354 {
355 	struct virtio_net *dev = arg;
356 	struct vduse_dev_request req;
357 	struct vduse_dev_response resp;
358 	struct vhost_virtqueue *vq;
359 	uint8_t old_status = dev->status;
360 	int ret;
361 
362 	memset(&resp, 0, sizeof(resp));
363 
364 	ret = read(fd, &req, sizeof(req));
365 	if (ret < 0) {
366 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read request: %s",
367 				strerror(errno));
368 		return;
369 	} else if (ret < (int)sizeof(req)) {
370 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Incomplete to read request %d", ret);
371 		return;
372 	}
373 
374 	VHOST_CONFIG_LOG(dev->ifname, INFO, "New request: %s (%u)",
375 			vduse_req_id_to_str(req.type), req.type);
376 
377 	switch (req.type) {
378 	case VDUSE_GET_VQ_STATE:
379 		vq = dev->virtqueue[req.vq_state.index];
380 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tvq index: %u, avail_index: %u",
381 				req.vq_state.index, vq->last_avail_idx);
382 		resp.vq_state.split.avail_index = vq->last_avail_idx;
383 		resp.result = VDUSE_REQ_RESULT_OK;
384 		break;
385 	case VDUSE_SET_STATUS:
386 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnew status: 0x%08x",
387 				req.s.status);
388 		old_status = dev->status;
389 		dev->status = req.s.status;
390 		dev->reconnect_log->status = dev->status;
391 		resp.result = VDUSE_REQ_RESULT_OK;
392 		break;
393 	case VDUSE_UPDATE_IOTLB:
394 		VHOST_CONFIG_LOG(dev->ifname, INFO, "\tIOVA range: %" PRIx64 " - %" PRIx64,
395 				(uint64_t)req.iova.start, (uint64_t)req.iova.last);
396 		vhost_user_iotlb_cache_remove(dev, req.iova.start,
397 				req.iova.last - req.iova.start + 1);
398 		resp.result = VDUSE_REQ_RESULT_OK;
399 		break;
400 	default:
401 		resp.result = VDUSE_REQ_RESULT_FAILED;
402 		break;
403 	}
404 
405 	resp.request_id = req.request_id;
406 
407 	ret = write(dev->vduse_dev_fd, &resp, sizeof(resp));
408 	if (ret != sizeof(resp)) {
409 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to write response %s",
410 				strerror(errno));
411 		return;
412 	}
413 
414 	if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
415 		if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
416 			vduse_device_start(dev, false);
417 		else
418 			vduse_device_stop(dev);
419 	}
420 
421 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Request %s (%u) handled successfully",
422 			vduse_req_id_to_str(req.type), req.type);
423 }
424 
425 static char vduse_reconnect_dir[PATH_MAX];
426 static bool vduse_reconnect_path_set;
427 
428 static int
429 vduse_reconnect_path_init(void)
430 {
431 	const char *directory;
432 	int ret;
433 
434 	/* from RuntimeDirectory= see systemd.exec */
435 	directory = getenv("RUNTIME_DIRECTORY");
436 	if (directory == NULL) {
437 		/*
438 		 * Used standard convention defined in
439 		 * XDG Base Directory Specification and
440 		 * Filesystem Hierarchy Standard.
441 		 */
442 		if (getuid() == 0)
443 			directory = "/var/run";
444 		else
445 			directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
446 	}
447 
448 	ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/vduse",
449 			directory);
450 	if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
451 		VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
452 		return -1;
453 	}
454 
455 	ret = mkdir(vduse_reconnect_dir, 0700);
456 	if (ret < 0 && errno != EEXIST) {
457 		VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
458 				vduse_reconnect_dir, strerror(errno));
459 		return -1;
460 	}
461 
462 	VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
463 			vduse_reconnect_dir);
464 
465 	return 0;
466 }
467 
468 static void
469 vduse_reconnect_handler(int fd, void *arg, int *remove)
470 {
471 	struct virtio_net *dev = arg;
472 
473 	vduse_device_start(dev, true);
474 
475 	close(fd);
476 	*remove = 1;
477 }
478 
479 int
480 vduse_device_create(const char *path, bool compliant_ol_flags)
481 {
482 	int control_fd, dev_fd, vid, ret, reco_fd;
483 	uint32_t i, max_queue_pairs, total_queues;
484 	struct virtio_net *dev;
485 	struct virtio_net_config vnet_config = {{ 0 }};
486 	uint64_t ver = VHOST_VDUSE_API_VERSION;
487 	uint64_t features;
488 	struct vduse_dev_config *dev_config = NULL;
489 	const char *name = path + strlen("/dev/vduse/");
490 	char reconnect_file[PATH_MAX];
491 	struct vhost_reconnect_data *reconnect_log = NULL;
492 	bool reconnect = false;
493 
494 	if (vduse.fdset == NULL) {
495 		vduse.fdset = fdset_init("vduse-evt");
496 		if (vduse.fdset == NULL) {
497 			VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE fdset");
498 			return -1;
499 		}
500 	}
501 
502 	if (vduse_reconnect_path_set == false) {
503 		if (vduse_reconnect_path_init() < 0) {
504 			VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
505 			return -1;
506 		}
507 		vduse_reconnect_path_set = true;
508 	}
509 
510 	ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
511 	if (ret < 0 || ret == sizeof(reconnect_file)) {
512 		VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
513 		return -1;
514 	}
515 
516 	control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
517 	if (control_fd < 0) {
518 		VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
519 				VDUSE_CTRL_PATH, strerror(errno));
520 		return -1;
521 	}
522 
523 	if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) {
524 		VHOST_CONFIG_LOG(name, ERR, "Failed to set API version: %" PRIu64 ": %s",
525 				ver, strerror(errno));
526 		ret = -1;
527 		goto out_ctrl_close;
528 	}
529 
530 	ret = rte_vhost_driver_get_features(path, &features);
531 	if (ret < 0) {
532 		VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
533 		goto out_free;
534 	}
535 
536 	ret = rte_vhost_driver_get_queue_num(path, &max_queue_pairs);
537 	if (ret < 0) {
538 		VHOST_CONFIG_LOG(name, ERR, "Failed to get max queue pairs");
539 		goto out_free;
540 	}
541 
542 	VHOST_CONFIG_LOG(path, INFO, "VDUSE max queue pairs: %u", max_queue_pairs);
543 	total_queues = max_queue_pairs * 2;
544 
545 	if (max_queue_pairs == 1)
546 		features &= ~(RTE_BIT64(VIRTIO_NET_F_CTRL_VQ) | RTE_BIT64(VIRTIO_NET_F_MQ));
547 	else
548 		total_queues += 1; /* Includes ctrl queue */
549 
550 	if (access(path, F_OK) == 0) {
551 		VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
552 		reconnect = true;
553 
554 		reco_fd = open(reconnect_file, O_RDWR, 0600);
555 		if (reco_fd < 0) {
556 			if (errno == ENOENT)
557 				VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
558 						reconnect_file);
559 			else
560 				VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
561 						reconnect_file, strerror(errno));
562 			ret = -1;
563 			goto out_ctrl_close;
564 		}
565 
566 		reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
567 				MAP_SHARED, reco_fd, 0);
568 		close(reco_fd);
569 		if (reconnect_log == MAP_FAILED) {
570 			VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
571 					reconnect_file, strerror(errno));
572 			ret = -1;
573 			goto out_ctrl_close;
574 		}
575 
576 		if (reconnect_log->version != VHOST_RECONNECT_VERSION) {
577 			VHOST_CONFIG_LOG(name, ERR,
578 					"Version mismatch between backend (0x%x) & reconnection file (0x%x)",
579 					VHOST_RECONNECT_VERSION, reconnect_log->version);
580 		}
581 
582 		if ((reconnect_log->features & features) != reconnect_log->features) {
583 			VHOST_CONFIG_LOG(name, ERR,
584 					"Features mismatch between backend (0x%" PRIx64 ") & reconnection file (0x%" PRIx64 ")",
585 					features, reconnect_log->features);
586 			ret = -1;
587 			goto out_ctrl_close;
588 		}
589 
590 		if (reconnect_log->nr_vrings != total_queues) {
591 			VHOST_CONFIG_LOG(name, ERR,
592 					"Queues number mismatch between backend (%u) and reconnection file (%u)",
593 					total_queues, reconnect_log->nr_vrings);
594 			ret = -1;
595 			goto out_ctrl_close;
596 		}
597 	} else {
598 		reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
599 		if (reco_fd < 0) {
600 			if (errno == EEXIST) {
601 				VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
602 						reconnect_file);
603 			} else {
604 				VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
605 						reconnect_file, strerror(errno));
606 			}
607 			ret = -1;
608 			goto out_ctrl_close;
609 		}
610 
611 		ret = ftruncate(reco_fd, sizeof(*reconnect_log));
612 		if (ret < 0) {
613 			VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
614 					reconnect_file, strerror(errno));
615 			close(reco_fd);
616 			goto out_ctrl_close;
617 		}
618 
619 		reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
620 					MAP_SHARED, reco_fd, 0);
621 		close(reco_fd);
622 		if (reconnect_log == MAP_FAILED) {
623 			VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
624 					reconnect_file, strerror(errno));
625 			ret = -1;
626 			goto out_ctrl_close;
627 		}
628 
629 		reconnect_log->version = VHOST_RECONNECT_VERSION;
630 
631 		dev_config = malloc(offsetof(struct vduse_dev_config, config) +
632 				sizeof(vnet_config));
633 		if (!dev_config) {
634 			VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
635 			ret = -1;
636 			goto out_ctrl_close;
637 		}
638 
639 		vnet_config.max_virtqueue_pairs = max_queue_pairs;
640 		memset(dev_config, 0, sizeof(struct vduse_dev_config));
641 
642 		rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
643 		dev_config->device_id = VIRTIO_ID_NET;
644 		dev_config->vendor_id = 0;
645 		dev_config->features = features;
646 		dev_config->vq_num = total_queues;
647 		dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
648 		dev_config->config_size = sizeof(struct virtio_net_config);
649 		memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
650 
651 		ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
652 		if (ret < 0) {
653 			VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
654 					strerror(errno));
655 			goto out_free;
656 		}
657 
658 		memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
659 		reconnect_log->nr_vrings = total_queues;
660 		free(dev_config);
661 		dev_config = NULL;
662 	}
663 
664 	dev_fd = open(path, O_RDWR);
665 	if (dev_fd < 0) {
666 		VHOST_CONFIG_LOG(name, ERR, "Failed to open device %s: %s",
667 				path, strerror(errno));
668 		ret = -1;
669 		goto out_dev_close;
670 	}
671 
672 	ret = fcntl(dev_fd, F_SETFL, O_NONBLOCK);
673 	if (ret < 0) {
674 		VHOST_CONFIG_LOG(name, ERR, "Failed to set chardev as non-blocking: %s",
675 				strerror(errno));
676 		goto out_dev_close;
677 	}
678 
679 	vid = vhost_new_device(&vduse_backend_ops);
680 	if (vid < 0) {
681 		VHOST_CONFIG_LOG(name, ERR, "Failed to create new Vhost device");
682 		ret = -1;
683 		goto out_dev_close;
684 	}
685 
686 	dev = get_device(vid);
687 	if (!dev) {
688 		ret = -1;
689 		goto out_dev_close;
690 	}
691 
692 	strncpy(dev->ifname, path, IF_NAME_SZ - 1);
693 	dev->vduse_ctrl_fd = control_fd;
694 	dev->vduse_dev_fd = dev_fd;
695 	dev->reconnect_log = reconnect_log;
696 	if (reconnect)
697 		dev->status = dev->reconnect_log->status;
698 
699 	vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
700 
701 	for (i = 0; i < total_queues; i++) {
702 		struct vduse_vq_config vq_cfg = { 0 };
703 		struct vhost_virtqueue *vq;
704 
705 		ret = alloc_vring_queue(dev, i);
706 		if (ret) {
707 			VHOST_CONFIG_LOG(name, ERR, "Failed to alloc vring %d metadata", i);
708 			goto out_dev_destroy;
709 		}
710 
711 		vq = dev->virtqueue[i];
712 		vq->reconnect_log = &reconnect_log->vring[i];
713 
714 		if (reconnect)
715 			continue;
716 
717 		vq_cfg.index = i;
718 		vq_cfg.max_size = 1024;
719 
720 		ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP, &vq_cfg);
721 		if (ret) {
722 			VHOST_CONFIG_LOG(name, ERR, "Failed to set-up VQ %d", i);
723 			goto out_dev_destroy;
724 		}
725 	}
726 
727 	dev->cvq = dev->virtqueue[max_queue_pairs * 2];
728 
729 	ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler, NULL, dev);
730 	if (ret) {
731 		VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
732 				dev->vduse_dev_fd);
733 		goto out_dev_destroy;
734 	}
735 
736 	if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
737 		/*
738 		 * Make vduse_device_start() being executed in the same
739 		 * context for both reconnection and fresh startup.
740 		 */
741 		reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
742 		if (reco_fd < 0) {
743 			VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
744 					strerror(errno));
745 			ret = -1;
746 			goto out_dev_destroy;
747 		}
748 
749 		ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
750 		if (ret) {
751 			VHOST_CONFIG_LOG(name, ERR, "Failed to add reconnect fd %d to vduse fdset",
752 					reco_fd);
753 			goto out_dev_destroy;
754 		}
755 
756 		ret = eventfd_write(reco_fd, (eventfd_t)1);
757 		if (ret < 0) {
758 			VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
759 			goto out_dev_destroy;
760 		}
761 	}
762 
763 	return 0;
764 
765 out_dev_destroy:
766 	vhost_destroy_device(vid);
767 out_dev_close:
768 	if (dev_fd >= 0)
769 		close(dev_fd);
770 	ioctl(control_fd, VDUSE_DESTROY_DEV, name);
771 out_free:
772 	free(dev_config);
773 out_ctrl_close:
774 	close(control_fd);
775 
776 	return ret;
777 }
778 
779 int
780 vduse_device_destroy(const char *path)
781 {
782 	const char *name = path + strlen("/dev/vduse/");
783 	struct virtio_net *dev;
784 	int vid, ret;
785 
786 	for (vid = 0; vid < RTE_MAX_VHOST_DEVICE; vid++) {
787 		dev = vhost_devices[vid];
788 
789 		if (dev == NULL)
790 			continue;
791 
792 		if (!strcmp(path, dev->ifname))
793 			break;
794 	}
795 
796 	if (vid == RTE_MAX_VHOST_DEVICE)
797 		return -1;
798 
799 	if (dev->reconnect_log)
800 		munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
801 
802 	vduse_device_stop(dev);
803 
804 	fdset_del(vduse.fdset, dev->vduse_dev_fd);
805 
806 	if (dev->vduse_dev_fd >= 0) {
807 		close(dev->vduse_dev_fd);
808 		dev->vduse_dev_fd = -1;
809 	}
810 
811 	if (dev->vduse_ctrl_fd >= 0) {
812 		char reconnect_file[PATH_MAX];
813 
814 		ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
815 		if (ret) {
816 			VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
817 					strerror(errno));
818 		} else {
819 			/*
820 			 * VDUSE device was no more attached to the vDPA bus,
821 			 * so we can remove the reconnect file.
822 			 */
823 			ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
824 					vduse_reconnect_dir, name);
825 			if (ret < 0 || ret == sizeof(reconnect_file))
826 				VHOST_CONFIG_LOG(name, ERR,
827 						"Failed to create vduse reconnect path name");
828 			else
829 				unlink(reconnect_file);
830 		}
831 
832 		close(dev->vduse_ctrl_fd);
833 		dev->vduse_ctrl_fd = -1;
834 	}
835 
836 	vhost_destroy_device(vid);
837 
838 	return 0;
839 }
840