xref: /dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c (revision b53d106d34b5c638f5a2cbdfee0da5bd42d4383f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <fcntl.h>
8 #include <string.h>
9 #include <errno.h>
10 #include <sys/mman.h>
11 #include <unistd.h>
12 #include <sys/eventfd.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 
16 #include <rte_alarm.h>
17 #include <rte_string_fns.h>
18 #include <rte_eal_memconfig.h>
19 
20 #include "vhost.h"
21 #include "virtio_user_dev.h"
22 #include "../virtio_ethdev.h"
23 
24 #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb"
25 
26 const char * const virtio_user_backend_strings[] = {
27 	[VIRTIO_USER_BACKEND_UNKNOWN] = "VIRTIO_USER_BACKEND_UNKNOWN",
28 	[VIRTIO_USER_BACKEND_VHOST_USER] = "VHOST_USER",
29 	[VIRTIO_USER_BACKEND_VHOST_KERNEL] = "VHOST_NET",
30 	[VIRTIO_USER_BACKEND_VHOST_VDPA] = "VHOST_VDPA",
31 };
32 
33 static int
34 virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
35 {
36 	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
37 	 * firstly because vhost depends on this msg to allocate virtqueue
38 	 * pair.
39 	 */
40 	struct vhost_vring_file file;
41 	int ret;
42 
43 	file.index = queue_sel;
44 	file.fd = dev->callfds[queue_sel];
45 	ret = dev->ops->set_vring_call(dev, &file);
46 	if (ret < 0) {
47 		PMD_INIT_LOG(ERR, "(%s) Failed to create queue %u", dev->path, queue_sel);
48 		return -1;
49 	}
50 
51 	return 0;
52 }
53 
54 static int
55 virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
56 {
57 	int ret;
58 	struct vhost_vring_file file;
59 	struct vhost_vring_state state;
60 	struct vring *vring = &dev->vrings[queue_sel];
61 	struct vring_packed *pq_vring = &dev->packed_vrings[queue_sel];
62 	struct vhost_vring_addr addr = {
63 		.index = queue_sel,
64 		.log_guest_addr = 0,
65 		.flags = 0, /* disable log */
66 	};
67 
68 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
69 		addr.desc_user_addr =
70 			(uint64_t)(uintptr_t)pq_vring->desc;
71 		addr.avail_user_addr =
72 			(uint64_t)(uintptr_t)pq_vring->driver;
73 		addr.used_user_addr =
74 			(uint64_t)(uintptr_t)pq_vring->device;
75 	} else {
76 		addr.desc_user_addr = (uint64_t)(uintptr_t)vring->desc;
77 		addr.avail_user_addr = (uint64_t)(uintptr_t)vring->avail;
78 		addr.used_user_addr = (uint64_t)(uintptr_t)vring->used;
79 	}
80 
81 	state.index = queue_sel;
82 	state.num = vring->num;
83 	ret = dev->ops->set_vring_num(dev, &state);
84 	if (ret < 0)
85 		goto err;
86 
87 	state.index = queue_sel;
88 	state.num = 0; /* no reservation */
89 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
90 		state.num |= (1 << 15);
91 	ret = dev->ops->set_vring_base(dev, &state);
92 	if (ret < 0)
93 		goto err;
94 
95 	ret = dev->ops->set_vring_addr(dev, &addr);
96 	if (ret < 0)
97 		goto err;
98 
99 	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
100 	 * lastly because vhost depends on this msg to judge if
101 	 * virtio is ready.
102 	 */
103 	file.index = queue_sel;
104 	file.fd = dev->kickfds[queue_sel];
105 	ret = dev->ops->set_vring_kick(dev, &file);
106 	if (ret < 0)
107 		goto err;
108 
109 	return 0;
110 err:
111 	PMD_INIT_LOG(ERR, "(%s) Failed to kick queue %u", dev->path, queue_sel);
112 
113 	return -1;
114 }
115 
116 static int
117 virtio_user_queue_setup(struct virtio_user_dev *dev,
118 			int (*fn)(struct virtio_user_dev *, uint32_t))
119 {
120 	uint32_t i, queue_sel;
121 
122 	for (i = 0; i < dev->max_queue_pairs; ++i) {
123 		queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
124 		if (fn(dev, queue_sel) < 0) {
125 			PMD_DRV_LOG(ERR, "(%s) setup rx vq %u failed", dev->path, i);
126 			return -1;
127 		}
128 	}
129 	for (i = 0; i < dev->max_queue_pairs; ++i) {
130 		queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
131 		if (fn(dev, queue_sel) < 0) {
132 			PMD_DRV_LOG(INFO, "(%s) setup tx vq %u failed", dev->path, i);
133 			return -1;
134 		}
135 	}
136 
137 	return 0;
138 }
139 
140 int
141 virtio_user_dev_set_features(struct virtio_user_dev *dev)
142 {
143 	uint64_t features;
144 	int ret = -1;
145 
146 	pthread_mutex_lock(&dev->mutex);
147 
148 	/* Step 0: tell vhost to create queues */
149 	if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
150 		goto error;
151 
152 	features = dev->features;
153 
154 	/* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
155 	features &= ~(1ull << VIRTIO_NET_F_MAC);
156 	/* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
157 	features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
158 	features &= ~(1ull << VIRTIO_NET_F_STATUS);
159 	ret = dev->ops->set_features(dev, features);
160 	if (ret < 0)
161 		goto error;
162 	PMD_DRV_LOG(INFO, "(%s) set features: 0x%" PRIx64, dev->path, features);
163 error:
164 	pthread_mutex_unlock(&dev->mutex);
165 
166 	return ret;
167 }
168 
169 int
170 virtio_user_start_device(struct virtio_user_dev *dev)
171 {
172 	int ret;
173 
174 	/*
175 	 * XXX workaround!
176 	 *
177 	 * We need to make sure that the locks will be
178 	 * taken in the correct order to avoid deadlocks.
179 	 *
180 	 * Before releasing this lock, this thread should
181 	 * not trigger any memory hotplug events.
182 	 *
183 	 * This is a temporary workaround, and should be
184 	 * replaced when we get proper supports from the
185 	 * memory subsystem in the future.
186 	 */
187 	rte_mcfg_mem_read_lock();
188 	pthread_mutex_lock(&dev->mutex);
189 
190 	/* Step 2: share memory regions */
191 	ret = dev->ops->set_memory_table(dev);
192 	if (ret < 0)
193 		goto error;
194 
195 	/* Step 3: kick queues */
196 	ret = virtio_user_queue_setup(dev, virtio_user_kick_queue);
197 	if (ret < 0)
198 		goto error;
199 
200 	/* Step 4: enable queues
201 	 * we enable the 1st queue pair by default.
202 	 */
203 	ret = dev->ops->enable_qp(dev, 0, 1);
204 	if (ret < 0)
205 		goto error;
206 
207 	dev->started = true;
208 
209 	pthread_mutex_unlock(&dev->mutex);
210 	rte_mcfg_mem_read_unlock();
211 
212 	return 0;
213 error:
214 	pthread_mutex_unlock(&dev->mutex);
215 	rte_mcfg_mem_read_unlock();
216 
217 	PMD_INIT_LOG(ERR, "(%s) Failed to start device", dev->path);
218 
219 	/* TODO: free resource here or caller to check */
220 	return -1;
221 }
222 
223 int virtio_user_stop_device(struct virtio_user_dev *dev)
224 {
225 	struct vhost_vring_state state;
226 	uint32_t i;
227 	int ret;
228 
229 	pthread_mutex_lock(&dev->mutex);
230 	if (!dev->started)
231 		goto out;
232 
233 	for (i = 0; i < dev->max_queue_pairs; ++i) {
234 		ret = dev->ops->enable_qp(dev, i, 0);
235 		if (ret < 0)
236 			goto err;
237 	}
238 
239 	/* Stop the backend. */
240 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
241 		state.index = i;
242 		ret = dev->ops->get_vring_base(dev, &state);
243 		if (ret < 0) {
244 			PMD_DRV_LOG(ERR, "(%s) get_vring_base failed, index=%u", dev->path, i);
245 			goto err;
246 		}
247 	}
248 
249 	dev->started = false;
250 
251 out:
252 	pthread_mutex_unlock(&dev->mutex);
253 
254 	return 0;
255 err:
256 	pthread_mutex_unlock(&dev->mutex);
257 
258 	PMD_INIT_LOG(ERR, "(%s) Failed to stop device", dev->path);
259 
260 	return -1;
261 }
262 
263 int
264 virtio_user_dev_set_mac(struct virtio_user_dev *dev)
265 {
266 	int ret = 0;
267 
268 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
269 		return -ENOTSUP;
270 
271 	if (!dev->ops->set_config)
272 		return -ENOTSUP;
273 
274 	ret = dev->ops->set_config(dev, dev->mac_addr,
275 			offsetof(struct virtio_net_config, mac),
276 			RTE_ETHER_ADDR_LEN);
277 	if (ret)
278 		PMD_DRV_LOG(ERR, "(%s) Failed to set MAC address in device", dev->path);
279 
280 	return ret;
281 }
282 
283 int
284 virtio_user_dev_get_mac(struct virtio_user_dev *dev)
285 {
286 	int ret = 0;
287 
288 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
289 		return -ENOTSUP;
290 
291 	if (!dev->ops->get_config)
292 		return -ENOTSUP;
293 
294 	ret = dev->ops->get_config(dev, dev->mac_addr,
295 			offsetof(struct virtio_net_config, mac),
296 			RTE_ETHER_ADDR_LEN);
297 	if (ret)
298 		PMD_DRV_LOG(ERR, "(%s) Failed to get MAC address from device", dev->path);
299 
300 	return ret;
301 }
302 
303 static void
304 virtio_user_dev_init_mac(struct virtio_user_dev *dev, const char *mac)
305 {
306 	struct rte_ether_addr cmdline_mac;
307 	char buf[RTE_ETHER_ADDR_FMT_SIZE];
308 	int ret;
309 
310 	if (mac && rte_ether_unformat_addr(mac, &cmdline_mac) == 0) {
311 		/*
312 		 * MAC address was passed from command-line, try to store
313 		 * it in the device if it supports it. Otherwise try to use
314 		 * the device one.
315 		 */
316 		memcpy(dev->mac_addr, &cmdline_mac, RTE_ETHER_ADDR_LEN);
317 		dev->mac_specified = 1;
318 
319 		/* Setting MAC may fail, continue to get the device one in this case */
320 		virtio_user_dev_set_mac(dev);
321 		ret = virtio_user_dev_get_mac(dev);
322 		if (ret == -ENOTSUP)
323 			goto out;
324 
325 		if (memcmp(&cmdline_mac, dev->mac_addr, RTE_ETHER_ADDR_LEN))
326 			PMD_DRV_LOG(INFO, "(%s) Device MAC update failed", dev->path);
327 	} else {
328 		ret = virtio_user_dev_get_mac(dev);
329 		if (ret) {
330 			PMD_DRV_LOG(ERR, "(%s) No valid MAC in devargs or device, use random",
331 					dev->path);
332 			return;
333 		}
334 
335 		dev->mac_specified = 1;
336 	}
337 out:
338 	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE,
339 			(struct rte_ether_addr *)dev->mac_addr);
340 	PMD_DRV_LOG(INFO, "(%s) MAC %s specified", dev->path, buf);
341 }
342 
343 static int
344 virtio_user_dev_init_notify(struct virtio_user_dev *dev)
345 {
346 	uint32_t i, j;
347 	int callfd;
348 	int kickfd;
349 
350 	for (i = 0; i < dev->max_queue_pairs * 2; i++) {
351 		/* May use invalid flag, but some backend uses kickfd and
352 		 * callfd as criteria to judge if dev is alive. so finally we
353 		 * use real event_fd.
354 		 */
355 		callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
356 		if (callfd < 0) {
357 			PMD_DRV_LOG(ERR, "(%s) callfd error, %s", dev->path, strerror(errno));
358 			goto err;
359 		}
360 		kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
361 		if (kickfd < 0) {
362 			close(callfd);
363 			PMD_DRV_LOG(ERR, "(%s) kickfd error, %s", dev->path, strerror(errno));
364 			goto err;
365 		}
366 		dev->callfds[i] = callfd;
367 		dev->kickfds[i] = kickfd;
368 	}
369 
370 	return 0;
371 err:
372 	for (j = 0; j < i; j++) {
373 		if (dev->kickfds[j] >= 0) {
374 			close(dev->kickfds[j]);
375 			dev->kickfds[j] = -1;
376 		}
377 		if (dev->callfds[j] >= 0) {
378 			close(dev->callfds[j]);
379 			dev->callfds[j] = -1;
380 		}
381 	}
382 
383 	return -1;
384 }
385 
386 static void
387 virtio_user_dev_uninit_notify(struct virtio_user_dev *dev)
388 {
389 	uint32_t i;
390 
391 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
392 		if (dev->kickfds[i] >= 0) {
393 			close(dev->kickfds[i]);
394 			dev->kickfds[i] = -1;
395 		}
396 		if (dev->callfds[i] >= 0) {
397 			close(dev->callfds[i]);
398 			dev->callfds[i] = -1;
399 		}
400 	}
401 }
402 
403 static int
404 virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
405 {
406 	uint32_t i;
407 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
408 
409 	if (eth_dev->intr_handle == NULL) {
410 		eth_dev->intr_handle =
411 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
412 		if (eth_dev->intr_handle == NULL) {
413 			PMD_DRV_LOG(ERR, "(%s) failed to allocate intr_handle", dev->path);
414 			return -1;
415 		}
416 	}
417 
418 	for (i = 0; i < dev->max_queue_pairs; ++i) {
419 		if (rte_intr_efds_index_set(eth_dev->intr_handle, i,
420 				dev->callfds[i]))
421 			return -rte_errno;
422 	}
423 
424 	if (rte_intr_nb_efd_set(eth_dev->intr_handle, dev->max_queue_pairs))
425 		return -rte_errno;
426 
427 	if (rte_intr_max_intr_set(eth_dev->intr_handle,
428 			dev->max_queue_pairs + 1))
429 		return -rte_errno;
430 
431 	if (rte_intr_type_set(eth_dev->intr_handle, RTE_INTR_HANDLE_VDEV))
432 		return -rte_errno;
433 
434 	/* For virtio vdev, no need to read counter for clean */
435 	if (rte_intr_efd_counter_size_set(eth_dev->intr_handle, 0))
436 		return -rte_errno;
437 
438 	if (rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev)))
439 		return -rte_errno;
440 
441 	return 0;
442 }
443 
444 static void
445 virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
446 			 const void *addr,
447 			 size_t len __rte_unused,
448 			 void *arg)
449 {
450 	struct virtio_user_dev *dev = arg;
451 	struct rte_memseg_list *msl;
452 	uint16_t i;
453 	int ret = 0;
454 
455 	/* ignore externally allocated memory */
456 	msl = rte_mem_virt2memseg_list(addr);
457 	if (msl->external)
458 		return;
459 
460 	pthread_mutex_lock(&dev->mutex);
461 
462 	if (dev->started == false)
463 		goto exit;
464 
465 	/* Step 1: pause the active queues */
466 	for (i = 0; i < dev->queue_pairs; i++) {
467 		ret = dev->ops->enable_qp(dev, i, 0);
468 		if (ret < 0)
469 			goto exit;
470 	}
471 
472 	/* Step 2: update memory regions */
473 	ret = dev->ops->set_memory_table(dev);
474 	if (ret < 0)
475 		goto exit;
476 
477 	/* Step 3: resume the active queues */
478 	for (i = 0; i < dev->queue_pairs; i++) {
479 		ret = dev->ops->enable_qp(dev, i, 1);
480 		if (ret < 0)
481 			goto exit;
482 	}
483 
484 exit:
485 	pthread_mutex_unlock(&dev->mutex);
486 
487 	if (ret < 0)
488 		PMD_DRV_LOG(ERR, "(%s) Failed to update memory table", dev->path);
489 }
490 
491 static int
492 virtio_user_dev_setup(struct virtio_user_dev *dev)
493 {
494 	if (dev->is_server) {
495 		if (dev->backend_type != VIRTIO_USER_BACKEND_VHOST_USER) {
496 			PMD_DRV_LOG(ERR, "Server mode only supports vhost-user!");
497 			return -1;
498 		}
499 	}
500 
501 	switch (dev->backend_type) {
502 	case VIRTIO_USER_BACKEND_VHOST_USER:
503 		dev->ops = &virtio_ops_user;
504 		break;
505 	case VIRTIO_USER_BACKEND_VHOST_KERNEL:
506 		dev->ops = &virtio_ops_kernel;
507 		break;
508 	case VIRTIO_USER_BACKEND_VHOST_VDPA:
509 		dev->ops = &virtio_ops_vdpa;
510 		break;
511 	default:
512 		PMD_DRV_LOG(ERR, "(%s) Unknown backend type", dev->path);
513 		return -1;
514 	}
515 
516 	if (dev->ops->setup(dev) < 0) {
517 		PMD_INIT_LOG(ERR, "(%s) Failed to setup backend", dev->path);
518 		return -1;
519 	}
520 
521 	if (virtio_user_dev_init_notify(dev) < 0) {
522 		PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path);
523 		goto destroy;
524 	}
525 
526 	if (virtio_user_fill_intr_handle(dev) < 0) {
527 		PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path);
528 		goto uninit;
529 	}
530 
531 	return 0;
532 
533 uninit:
534 	virtio_user_dev_uninit_notify(dev);
535 destroy:
536 	dev->ops->destroy(dev);
537 
538 	return -1;
539 }
540 
541 /* Use below macro to filter features from vhost backend */
542 #define VIRTIO_USER_SUPPORTED_FEATURES			\
543 	(1ULL << VIRTIO_NET_F_MAC		|	\
544 	 1ULL << VIRTIO_NET_F_STATUS		|	\
545 	 1ULL << VIRTIO_NET_F_MQ		|	\
546 	 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR	|	\
547 	 1ULL << VIRTIO_NET_F_CTRL_VQ		|	\
548 	 1ULL << VIRTIO_NET_F_CTRL_RX		|	\
549 	 1ULL << VIRTIO_NET_F_CTRL_VLAN		|	\
550 	 1ULL << VIRTIO_NET_F_CSUM		|	\
551 	 1ULL << VIRTIO_NET_F_HOST_TSO4		|	\
552 	 1ULL << VIRTIO_NET_F_HOST_TSO6		|	\
553 	 1ULL << VIRTIO_NET_F_MRG_RXBUF		|	\
554 	 1ULL << VIRTIO_RING_F_INDIRECT_DESC	|	\
555 	 1ULL << VIRTIO_NET_F_GUEST_CSUM	|	\
556 	 1ULL << VIRTIO_NET_F_GUEST_TSO4	|	\
557 	 1ULL << VIRTIO_NET_F_GUEST_TSO6	|	\
558 	 1ULL << VIRTIO_F_IN_ORDER		|	\
559 	 1ULL << VIRTIO_F_VERSION_1		|	\
560 	 1ULL << VIRTIO_F_RING_PACKED)
561 
562 int
563 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
564 		     int cq, int queue_size, const char *mac, char **ifname,
565 		     int server, int mrg_rxbuf, int in_order, int packed_vq,
566 		     enum virtio_user_backend_type backend_type)
567 {
568 	uint64_t backend_features;
569 	int i;
570 
571 	pthread_mutex_init(&dev->mutex, NULL);
572 	strlcpy(dev->path, path, PATH_MAX);
573 
574 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
575 		dev->kickfds[i] = -1;
576 		dev->callfds[i] = -1;
577 	}
578 
579 	dev->started = 0;
580 	dev->max_queue_pairs = queues;
581 	dev->queue_pairs = 1; /* mq disabled by default */
582 	dev->queue_size = queue_size;
583 	dev->is_server = server;
584 	dev->mac_specified = 0;
585 	dev->frontend_features = 0;
586 	dev->unsupported_features = 0;
587 	dev->backend_type = backend_type;
588 
589 	if (*ifname) {
590 		dev->ifname = *ifname;
591 		*ifname = NULL;
592 	}
593 
594 	if (virtio_user_dev_setup(dev) < 0) {
595 		PMD_INIT_LOG(ERR, "(%s) backend set up fails", dev->path);
596 		return -1;
597 	}
598 
599 	if (dev->ops->set_owner(dev) < 0) {
600 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend owner", dev->path);
601 		return -1;
602 	}
603 
604 	if (dev->ops->get_backend_features(&backend_features) < 0) {
605 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend features", dev->path);
606 		return -1;
607 	}
608 
609 	dev->unsupported_features = ~(VIRTIO_USER_SUPPORTED_FEATURES | backend_features);
610 
611 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
612 		PMD_INIT_LOG(ERR, "(%s) Failed to get device features", dev->path);
613 		return -1;
614 	}
615 
616 	virtio_user_dev_init_mac(dev, mac);
617 
618 	if (!mrg_rxbuf)
619 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
620 
621 	if (!in_order)
622 		dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
623 
624 	if (!packed_vq)
625 		dev->unsupported_features |= (1ull << VIRTIO_F_RING_PACKED);
626 
627 	if (dev->mac_specified)
628 		dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC);
629 	else
630 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
631 
632 	if (cq) {
633 		/* device does not really need to know anything about CQ,
634 		 * so if necessary, we just claim to support CQ
635 		 */
636 		dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
637 	} else {
638 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
639 		/* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
640 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
641 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
642 		dev->unsupported_features |=
643 			(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
644 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
645 		dev->unsupported_features |=
646 			(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
647 	}
648 
649 	/* The backend will not report this feature, we add it explicitly */
650 	if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER)
651 		dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
652 
653 	dev->frontend_features &= ~dev->unsupported_features;
654 	dev->device_features &= ~dev->unsupported_features;
655 
656 	if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
657 				virtio_user_mem_event_cb, dev)) {
658 		if (rte_errno != ENOTSUP) {
659 			PMD_INIT_LOG(ERR, "(%s) Failed to register mem event callback",
660 					dev->path);
661 			return -1;
662 		}
663 	}
664 
665 	return 0;
666 }
667 
668 void
669 virtio_user_dev_uninit(struct virtio_user_dev *dev)
670 {
671 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
672 
673 	rte_intr_instance_free(eth_dev->intr_handle);
674 	eth_dev->intr_handle = NULL;
675 
676 	virtio_user_stop_device(dev);
677 
678 	rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev);
679 
680 	virtio_user_dev_uninit_notify(dev);
681 
682 	free(dev->ifname);
683 
684 	if (dev->is_server)
685 		unlink(dev->path);
686 
687 	dev->ops->destroy(dev);
688 }
689 
690 uint8_t
691 virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
692 {
693 	uint16_t i;
694 	uint8_t ret = 0;
695 
696 	if (q_pairs > dev->max_queue_pairs) {
697 		PMD_INIT_LOG(ERR, "(%s) multi-q config %u, but only %u supported",
698 			     dev->path, q_pairs, dev->max_queue_pairs);
699 		return -1;
700 	}
701 
702 	for (i = 0; i < q_pairs; ++i)
703 		ret |= dev->ops->enable_qp(dev, i, 1);
704 	for (i = q_pairs; i < dev->max_queue_pairs; ++i)
705 		ret |= dev->ops->enable_qp(dev, i, 0);
706 
707 	dev->queue_pairs = q_pairs;
708 
709 	return ret;
710 }
711 
712 static uint32_t
713 virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
714 			    uint16_t idx_hdr)
715 {
716 	struct virtio_net_ctrl_hdr *hdr;
717 	virtio_net_ctrl_ack status = ~0;
718 	uint16_t i, idx_data, idx_status;
719 	uint32_t n_descs = 0;
720 
721 	/* locate desc for header, data, and status */
722 	idx_data = vring->desc[idx_hdr].next;
723 	n_descs++;
724 
725 	i = idx_data;
726 	while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
727 		i = vring->desc[i].next;
728 		n_descs++;
729 	}
730 
731 	/* locate desc for status */
732 	idx_status = i;
733 	n_descs++;
734 
735 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
736 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
737 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
738 		uint16_t queues;
739 
740 		queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
741 		status = virtio_user_handle_mq(dev, queues);
742 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
743 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
744 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
745 		status = 0;
746 	}
747 
748 	/* Update status */
749 	*(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
750 
751 	return n_descs;
752 }
753 
754 static inline int
755 desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
756 {
757 	uint16_t flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE);
758 
759 	return wrap_counter == !!(flags & VRING_PACKED_DESC_F_AVAIL) &&
760 		wrap_counter != !!(flags & VRING_PACKED_DESC_F_USED);
761 }
762 
763 static uint32_t
764 virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev,
765 				   struct vring_packed *vring,
766 				   uint16_t idx_hdr)
767 {
768 	struct virtio_net_ctrl_hdr *hdr;
769 	virtio_net_ctrl_ack status = ~0;
770 	uint16_t idx_data, idx_status;
771 	/* initialize to one, header is first */
772 	uint32_t n_descs = 1;
773 
774 	/* locate desc for header, data, and status */
775 	idx_data = idx_hdr + 1;
776 	if (idx_data >= dev->queue_size)
777 		idx_data -= dev->queue_size;
778 
779 	n_descs++;
780 
781 	idx_status = idx_data;
782 	while (vring->desc[idx_status].flags & VRING_DESC_F_NEXT) {
783 		idx_status++;
784 		if (idx_status >= dev->queue_size)
785 			idx_status -= dev->queue_size;
786 		n_descs++;
787 	}
788 
789 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
790 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
791 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
792 		uint16_t queues;
793 
794 		queues = *(uint16_t *)(uintptr_t)
795 				vring->desc[idx_data].addr;
796 		status = virtio_user_handle_mq(dev, queues);
797 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
798 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
799 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
800 		status = 0;
801 	}
802 
803 	/* Update status */
804 	*(virtio_net_ctrl_ack *)(uintptr_t)
805 		vring->desc[idx_status].addr = status;
806 
807 	/* Update used descriptor */
808 	vring->desc[idx_hdr].id = vring->desc[idx_status].id;
809 	vring->desc[idx_hdr].len = sizeof(status);
810 
811 	return n_descs;
812 }
813 
814 void
815 virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx)
816 {
817 	struct virtio_user_queue *vq = &dev->packed_queues[queue_idx];
818 	struct vring_packed *vring = &dev->packed_vrings[queue_idx];
819 	uint16_t n_descs, flags;
820 
821 	/* Perform a load-acquire barrier in desc_is_avail to
822 	 * enforce the ordering between desc flags and desc
823 	 * content.
824 	 */
825 	while (desc_is_avail(&vring->desc[vq->used_idx],
826 			     vq->used_wrap_counter)) {
827 
828 		n_descs = virtio_user_handle_ctrl_msg_packed(dev, vring,
829 				vq->used_idx);
830 
831 		flags = VRING_DESC_F_WRITE;
832 		if (vq->used_wrap_counter)
833 			flags |= VRING_PACKED_DESC_F_AVAIL_USED;
834 
835 		__atomic_store_n(&vring->desc[vq->used_idx].flags, flags,
836 				 __ATOMIC_RELEASE);
837 
838 		vq->used_idx += n_descs;
839 		if (vq->used_idx >= dev->queue_size) {
840 			vq->used_idx -= dev->queue_size;
841 			vq->used_wrap_counter ^= 1;
842 		}
843 	}
844 }
845 
846 void
847 virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
848 {
849 	uint16_t avail_idx, desc_idx;
850 	struct vring_used_elem *uep;
851 	uint32_t n_descs;
852 	struct vring *vring = &dev->vrings[queue_idx];
853 
854 	/* Consume avail ring, using used ring idx as first one */
855 	while (__atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
856 	       != vring->avail->idx) {
857 		avail_idx = __atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
858 			    & (vring->num - 1);
859 		desc_idx = vring->avail->ring[avail_idx];
860 
861 		n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
862 
863 		/* Update used ring */
864 		uep = &vring->used->ring[avail_idx];
865 		uep->id = desc_idx;
866 		uep->len = n_descs;
867 
868 		__atomic_add_fetch(&vring->used->idx, 1, __ATOMIC_RELAXED);
869 	}
870 }
871 
872 int
873 virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status)
874 {
875 	int ret;
876 
877 	pthread_mutex_lock(&dev->mutex);
878 	dev->status = status;
879 	ret = dev->ops->set_status(dev, status);
880 	if (ret && ret != -ENOTSUP)
881 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend status", dev->path);
882 
883 	pthread_mutex_unlock(&dev->mutex);
884 	return ret;
885 }
886 
887 int
888 virtio_user_dev_update_status(struct virtio_user_dev *dev)
889 {
890 	int ret;
891 	uint8_t status;
892 
893 	pthread_mutex_lock(&dev->mutex);
894 
895 	ret = dev->ops->get_status(dev, &status);
896 	if (!ret) {
897 		dev->status = status;
898 		PMD_INIT_LOG(DEBUG, "Updated Device Status(0x%08x):\n"
899 			"\t-RESET: %u\n"
900 			"\t-ACKNOWLEDGE: %u\n"
901 			"\t-DRIVER: %u\n"
902 			"\t-DRIVER_OK: %u\n"
903 			"\t-FEATURES_OK: %u\n"
904 			"\t-DEVICE_NEED_RESET: %u\n"
905 			"\t-FAILED: %u",
906 			dev->status,
907 			(dev->status == VIRTIO_CONFIG_STATUS_RESET),
908 			!!(dev->status & VIRTIO_CONFIG_STATUS_ACK),
909 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER),
910 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK),
911 			!!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK),
912 			!!(dev->status & VIRTIO_CONFIG_STATUS_DEV_NEED_RESET),
913 			!!(dev->status & VIRTIO_CONFIG_STATUS_FAILED));
914 	} else if (ret != -ENOTSUP) {
915 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend status", dev->path);
916 	}
917 
918 	pthread_mutex_unlock(&dev->mutex);
919 	return ret;
920 }
921 
922 int
923 virtio_user_dev_update_link_state(struct virtio_user_dev *dev)
924 {
925 	if (dev->ops->update_link_state)
926 		return dev->ops->update_link_state(dev);
927 
928 	return 0;
929 }
930 
931 static void
932 virtio_user_dev_reset_queues_packed(struct rte_eth_dev *eth_dev)
933 {
934 	struct virtio_user_dev *dev = eth_dev->data->dev_private;
935 	struct virtio_hw *hw = &dev->hw;
936 	struct virtnet_rx *rxvq;
937 	struct virtnet_tx *txvq;
938 	uint16_t i;
939 
940 	/* Add lock to avoid queue contention. */
941 	rte_spinlock_lock(&hw->state_lock);
942 	hw->started = 0;
943 
944 	/*
945 	 * Waiting for datapath to complete before resetting queues.
946 	 * 1 ms should be enough for the ongoing Tx/Rx function to finish.
947 	 */
948 	rte_delay_ms(1);
949 
950 	/* Vring reset for each Tx queue and Rx queue. */
951 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
952 		rxvq = eth_dev->data->rx_queues[i];
953 		virtqueue_rxvq_reset_packed(virtnet_rxq_to_vq(rxvq));
954 		virtio_dev_rx_queue_setup_finish(eth_dev, i);
955 	}
956 
957 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
958 		txvq = eth_dev->data->tx_queues[i];
959 		virtqueue_txvq_reset_packed(virtnet_txq_to_vq(txvq));
960 	}
961 
962 	hw->started = 1;
963 	rte_spinlock_unlock(&hw->state_lock);
964 }
965 
966 void
967 virtio_user_dev_delayed_disconnect_handler(void *param)
968 {
969 	struct virtio_user_dev *dev = param;
970 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
971 
972 	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
973 		PMD_DRV_LOG(ERR, "interrupt disable failed");
974 		return;
975 	}
976 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
977 		    rte_intr_fd_get(eth_dev->intr_handle));
978 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
979 					 virtio_interrupt_handler,
980 					 eth_dev) != 1)
981 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
982 
983 	if (dev->is_server) {
984 		if (dev->ops->server_disconnect)
985 			dev->ops->server_disconnect(dev);
986 
987 		rte_intr_fd_set(eth_dev->intr_handle,
988 			dev->ops->get_intr_fd(dev));
989 
990 		PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
991 			    rte_intr_fd_get(eth_dev->intr_handle));
992 
993 		if (rte_intr_callback_register(eth_dev->intr_handle,
994 					       virtio_interrupt_handler,
995 					       eth_dev))
996 			PMD_DRV_LOG(ERR, "interrupt register failed");
997 
998 		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
999 			PMD_DRV_LOG(ERR, "interrupt enable failed");
1000 			return;
1001 		}
1002 	}
1003 }
1004 
1005 static void
1006 virtio_user_dev_delayed_intr_reconfig_handler(void *param)
1007 {
1008 	struct virtio_user_dev *dev = param;
1009 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1010 
1011 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
1012 		    rte_intr_fd_get(eth_dev->intr_handle));
1013 
1014 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
1015 					 virtio_interrupt_handler,
1016 					 eth_dev) != 1)
1017 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
1018 
1019 	rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev));
1020 
1021 	PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
1022 		    rte_intr_fd_get(eth_dev->intr_handle));
1023 
1024 	if (rte_intr_callback_register(eth_dev->intr_handle,
1025 				       virtio_interrupt_handler, eth_dev))
1026 		PMD_DRV_LOG(ERR, "interrupt register failed");
1027 
1028 	if (rte_intr_enable(eth_dev->intr_handle) < 0)
1029 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1030 }
1031 
1032 int
1033 virtio_user_dev_server_reconnect(struct virtio_user_dev *dev)
1034 {
1035 	int ret, old_status;
1036 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1037 	struct virtio_hw *hw = &dev->hw;
1038 
1039 	if (!dev->ops->server_reconnect) {
1040 		PMD_DRV_LOG(ERR, "(%s) Missing server reconnect callback", dev->path);
1041 		return -1;
1042 	}
1043 
1044 	if (dev->ops->server_reconnect(dev)) {
1045 		PMD_DRV_LOG(ERR, "(%s) Reconnect callback call failed", dev->path);
1046 		return -1;
1047 	}
1048 
1049 	old_status = dev->status;
1050 
1051 	virtio_reset(hw);
1052 
1053 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1054 
1055 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1056 
1057 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
1058 		PMD_INIT_LOG(ERR, "get_features failed: %s",
1059 			     strerror(errno));
1060 		return -1;
1061 	}
1062 
1063 	/* unmask vhost-user unsupported features */
1064 	dev->device_features &= ~(dev->unsupported_features);
1065 
1066 	dev->features &= (dev->device_features | dev->frontend_features);
1067 
1068 	/* For packed ring, resetting queues is required in reconnection. */
1069 	if (virtio_with_packed_queue(hw) &&
1070 	   (old_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) {
1071 		PMD_INIT_LOG(NOTICE, "Packets on the fly will be dropped"
1072 				" when packed ring reconnecting.");
1073 		virtio_user_dev_reset_queues_packed(eth_dev);
1074 	}
1075 
1076 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1077 
1078 	/* Start the device */
1079 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
1080 	if (!dev->started)
1081 		return -1;
1082 
1083 	if (dev->queue_pairs > 1) {
1084 		ret = virtio_user_handle_mq(dev, dev->queue_pairs);
1085 		if (ret != 0) {
1086 			PMD_INIT_LOG(ERR, "Fails to enable multi-queue pairs!");
1087 			return -1;
1088 		}
1089 	}
1090 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1091 		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
1092 			PMD_DRV_LOG(ERR, "interrupt disable failed");
1093 			return -1;
1094 		}
1095 		/*
1096 		 * This function can be called from the interrupt handler, so
1097 		 * we can't unregister interrupt handler here.  Setting
1098 		 * alarm to do that later.
1099 		 */
1100 		rte_eal_alarm_set(1,
1101 			virtio_user_dev_delayed_intr_reconfig_handler,
1102 			(void *)dev);
1103 	}
1104 	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
1105 	return 0;
1106 }
1107