xref: /dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c (revision 8f1d23ece06adff5eae9f1b4365bdbbd3abee2b2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <fcntl.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <sys/mman.h>
12 #include <unistd.h>
13 #include <sys/eventfd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 
17 #include <rte_alarm.h>
18 #include <rte_string_fns.h>
19 #include <rte_eal_memconfig.h>
20 
21 #include "vhost.h"
22 #include "virtio_user_dev.h"
23 #include "../virtio_ethdev.h"
24 
25 #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb"
26 
27 const char * const virtio_user_backend_strings[] = {
28 	[VIRTIO_USER_BACKEND_UNKNOWN] = "VIRTIO_USER_BACKEND_UNKNOWN",
29 	[VIRTIO_USER_BACKEND_VHOST_USER] = "VHOST_USER",
30 	[VIRTIO_USER_BACKEND_VHOST_KERNEL] = "VHOST_NET",
31 	[VIRTIO_USER_BACKEND_VHOST_VDPA] = "VHOST_VDPA",
32 };
33 
34 static int
35 virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
36 {
37 	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
38 	 * firstly because vhost depends on this msg to allocate virtqueue
39 	 * pair.
40 	 */
41 	struct vhost_vring_file file;
42 	int ret;
43 
44 	file.index = queue_sel;
45 	file.fd = dev->callfds[queue_sel];
46 	ret = dev->ops->set_vring_call(dev, &file);
47 	if (ret < 0) {
48 		PMD_INIT_LOG(ERR, "(%s) Failed to create queue %u", dev->path, queue_sel);
49 		return -1;
50 	}
51 
52 	return 0;
53 }
54 
55 static int
56 virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
57 {
58 	int ret;
59 	struct vhost_vring_file file;
60 	struct vhost_vring_state state;
61 	struct vring *vring = &dev->vrings[queue_sel];
62 	struct vring_packed *pq_vring = &dev->packed_vrings[queue_sel];
63 	struct vhost_vring_addr addr = {
64 		.index = queue_sel,
65 		.log_guest_addr = 0,
66 		.flags = 0, /* disable log */
67 	};
68 
69 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
70 		addr.desc_user_addr =
71 			(uint64_t)(uintptr_t)pq_vring->desc;
72 		addr.avail_user_addr =
73 			(uint64_t)(uintptr_t)pq_vring->driver;
74 		addr.used_user_addr =
75 			(uint64_t)(uintptr_t)pq_vring->device;
76 	} else {
77 		addr.desc_user_addr = (uint64_t)(uintptr_t)vring->desc;
78 		addr.avail_user_addr = (uint64_t)(uintptr_t)vring->avail;
79 		addr.used_user_addr = (uint64_t)(uintptr_t)vring->used;
80 	}
81 
82 	state.index = queue_sel;
83 	state.num = vring->num;
84 	ret = dev->ops->set_vring_num(dev, &state);
85 	if (ret < 0)
86 		goto err;
87 
88 	state.index = queue_sel;
89 	state.num = 0; /* no reservation */
90 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
91 		state.num |= (1 << 15);
92 	ret = dev->ops->set_vring_base(dev, &state);
93 	if (ret < 0)
94 		goto err;
95 
96 	ret = dev->ops->set_vring_addr(dev, &addr);
97 	if (ret < 0)
98 		goto err;
99 
100 	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
101 	 * lastly because vhost depends on this msg to judge if
102 	 * virtio is ready.
103 	 */
104 	file.index = queue_sel;
105 	file.fd = dev->kickfds[queue_sel];
106 	ret = dev->ops->set_vring_kick(dev, &file);
107 	if (ret < 0)
108 		goto err;
109 
110 	return 0;
111 err:
112 	PMD_INIT_LOG(ERR, "(%s) Failed to kick queue %u", dev->path, queue_sel);
113 
114 	return -1;
115 }
116 
117 static int
118 virtio_user_queue_setup(struct virtio_user_dev *dev,
119 			int (*fn)(struct virtio_user_dev *, uint32_t))
120 {
121 	uint32_t i, queue_sel;
122 
123 	for (i = 0; i < dev->max_queue_pairs; ++i) {
124 		queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
125 		if (fn(dev, queue_sel) < 0) {
126 			PMD_DRV_LOG(ERR, "(%s) setup rx vq %u failed", dev->path, i);
127 			return -1;
128 		}
129 	}
130 	for (i = 0; i < dev->max_queue_pairs; ++i) {
131 		queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
132 		if (fn(dev, queue_sel) < 0) {
133 			PMD_DRV_LOG(INFO, "(%s) setup tx vq %u failed", dev->path, i);
134 			return -1;
135 		}
136 	}
137 
138 	return 0;
139 }
140 
141 int
142 virtio_user_dev_set_features(struct virtio_user_dev *dev)
143 {
144 	uint64_t features;
145 	int ret = -1;
146 
147 	pthread_mutex_lock(&dev->mutex);
148 
149 	/* Step 0: tell vhost to create queues */
150 	if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
151 		goto error;
152 
153 	features = dev->features;
154 
155 	/* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
156 	features &= ~(1ull << VIRTIO_NET_F_MAC);
157 	/* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
158 	features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
159 	features &= ~(1ull << VIRTIO_NET_F_STATUS);
160 	ret = dev->ops->set_features(dev, features);
161 	if (ret < 0)
162 		goto error;
163 	PMD_DRV_LOG(INFO, "(%s) set features: 0x%" PRIx64, dev->path, features);
164 error:
165 	pthread_mutex_unlock(&dev->mutex);
166 
167 	return ret;
168 }
169 
170 int
171 virtio_user_start_device(struct virtio_user_dev *dev)
172 {
173 	int ret;
174 
175 	/*
176 	 * XXX workaround!
177 	 *
178 	 * We need to make sure that the locks will be
179 	 * taken in the correct order to avoid deadlocks.
180 	 *
181 	 * Before releasing this lock, this thread should
182 	 * not trigger any memory hotplug events.
183 	 *
184 	 * This is a temporary workaround, and should be
185 	 * replaced when we get proper supports from the
186 	 * memory subsystem in the future.
187 	 */
188 	rte_mcfg_mem_read_lock();
189 	pthread_mutex_lock(&dev->mutex);
190 
191 	/* Step 2: share memory regions */
192 	ret = dev->ops->set_memory_table(dev);
193 	if (ret < 0)
194 		goto error;
195 
196 	/* Step 3: kick queues */
197 	ret = virtio_user_queue_setup(dev, virtio_user_kick_queue);
198 	if (ret < 0)
199 		goto error;
200 
201 	/* Step 4: enable queues
202 	 * we enable the 1st queue pair by default.
203 	 */
204 	ret = dev->ops->enable_qp(dev, 0, 1);
205 	if (ret < 0)
206 		goto error;
207 
208 	dev->started = true;
209 
210 	pthread_mutex_unlock(&dev->mutex);
211 	rte_mcfg_mem_read_unlock();
212 
213 	return 0;
214 error:
215 	pthread_mutex_unlock(&dev->mutex);
216 	rte_mcfg_mem_read_unlock();
217 
218 	PMD_INIT_LOG(ERR, "(%s) Failed to start device", dev->path);
219 
220 	/* TODO: free resource here or caller to check */
221 	return -1;
222 }
223 
224 int virtio_user_stop_device(struct virtio_user_dev *dev)
225 {
226 	struct vhost_vring_state state;
227 	uint32_t i;
228 	int ret;
229 
230 	pthread_mutex_lock(&dev->mutex);
231 	if (!dev->started)
232 		goto out;
233 
234 	for (i = 0; i < dev->max_queue_pairs; ++i) {
235 		ret = dev->ops->enable_qp(dev, i, 0);
236 		if (ret < 0)
237 			goto err;
238 	}
239 
240 	/* Stop the backend. */
241 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
242 		state.index = i;
243 		ret = dev->ops->get_vring_base(dev, &state);
244 		if (ret < 0) {
245 			PMD_DRV_LOG(ERR, "(%s) get_vring_base failed, index=%u", dev->path, i);
246 			goto err;
247 		}
248 	}
249 
250 	dev->started = false;
251 
252 out:
253 	pthread_mutex_unlock(&dev->mutex);
254 
255 	return 0;
256 err:
257 	pthread_mutex_unlock(&dev->mutex);
258 
259 	PMD_INIT_LOG(ERR, "(%s) Failed to stop device", dev->path);
260 
261 	return -1;
262 }
263 
264 int
265 virtio_user_dev_set_mac(struct virtio_user_dev *dev)
266 {
267 	int ret = 0;
268 
269 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
270 		return -ENOTSUP;
271 
272 	if (!dev->ops->set_config)
273 		return -ENOTSUP;
274 
275 	ret = dev->ops->set_config(dev, dev->mac_addr,
276 			offsetof(struct virtio_net_config, mac),
277 			RTE_ETHER_ADDR_LEN);
278 	if (ret)
279 		PMD_DRV_LOG(ERR, "(%s) Failed to set MAC address in device", dev->path);
280 
281 	return ret;
282 }
283 
284 int
285 virtio_user_dev_get_mac(struct virtio_user_dev *dev)
286 {
287 	int ret = 0;
288 
289 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
290 		return -ENOTSUP;
291 
292 	if (!dev->ops->get_config)
293 		return -ENOTSUP;
294 
295 	ret = dev->ops->get_config(dev, dev->mac_addr,
296 			offsetof(struct virtio_net_config, mac),
297 			RTE_ETHER_ADDR_LEN);
298 	if (ret)
299 		PMD_DRV_LOG(ERR, "(%s) Failed to get MAC address from device", dev->path);
300 
301 	return ret;
302 }
303 
304 static void
305 virtio_user_dev_init_mac(struct virtio_user_dev *dev, const char *mac)
306 {
307 	struct rte_ether_addr cmdline_mac;
308 	char buf[RTE_ETHER_ADDR_FMT_SIZE];
309 	int ret;
310 
311 	if (mac && rte_ether_unformat_addr(mac, &cmdline_mac) == 0) {
312 		/*
313 		 * MAC address was passed from command-line, try to store
314 		 * it in the device if it supports it. Otherwise try to use
315 		 * the device one.
316 		 */
317 		memcpy(dev->mac_addr, &cmdline_mac, RTE_ETHER_ADDR_LEN);
318 		dev->mac_specified = 1;
319 
320 		/* Setting MAC may fail, continue to get the device one in this case */
321 		virtio_user_dev_set_mac(dev);
322 		ret = virtio_user_dev_get_mac(dev);
323 		if (ret == -ENOTSUP)
324 			goto out;
325 
326 		if (memcmp(&cmdline_mac, dev->mac_addr, RTE_ETHER_ADDR_LEN))
327 			PMD_DRV_LOG(INFO, "(%s) Device MAC update failed", dev->path);
328 	} else {
329 		ret = virtio_user_dev_get_mac(dev);
330 		if (ret) {
331 			PMD_DRV_LOG(ERR, "(%s) No valid MAC in devargs or device, use random",
332 					dev->path);
333 			return;
334 		}
335 
336 		dev->mac_specified = 1;
337 	}
338 out:
339 	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE,
340 			(struct rte_ether_addr *)dev->mac_addr);
341 	PMD_DRV_LOG(INFO, "(%s) MAC %s specified", dev->path, buf);
342 }
343 
344 static int
345 virtio_user_dev_init_notify(struct virtio_user_dev *dev)
346 {
347 	uint32_t i, j;
348 	int callfd;
349 	int kickfd;
350 
351 	for (i = 0; i < dev->max_queue_pairs * 2; i++) {
352 		/* May use invalid flag, but some backend uses kickfd and
353 		 * callfd as criteria to judge if dev is alive. so finally we
354 		 * use real event_fd.
355 		 */
356 		callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
357 		if (callfd < 0) {
358 			PMD_DRV_LOG(ERR, "(%s) callfd error, %s", dev->path, strerror(errno));
359 			goto err;
360 		}
361 		kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
362 		if (kickfd < 0) {
363 			close(callfd);
364 			PMD_DRV_LOG(ERR, "(%s) kickfd error, %s", dev->path, strerror(errno));
365 			goto err;
366 		}
367 		dev->callfds[i] = callfd;
368 		dev->kickfds[i] = kickfd;
369 	}
370 
371 	return 0;
372 err:
373 	for (j = 0; j < i; j++) {
374 		if (dev->kickfds[j] >= 0) {
375 			close(dev->kickfds[j]);
376 			dev->kickfds[j] = -1;
377 		}
378 		if (dev->callfds[j] >= 0) {
379 			close(dev->callfds[j]);
380 			dev->callfds[j] = -1;
381 		}
382 	}
383 
384 	return -1;
385 }
386 
387 static void
388 virtio_user_dev_uninit_notify(struct virtio_user_dev *dev)
389 {
390 	uint32_t i;
391 
392 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
393 		if (dev->kickfds[i] >= 0) {
394 			close(dev->kickfds[i]);
395 			dev->kickfds[i] = -1;
396 		}
397 		if (dev->callfds[i] >= 0) {
398 			close(dev->callfds[i]);
399 			dev->callfds[i] = -1;
400 		}
401 	}
402 }
403 
404 static int
405 virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
406 {
407 	uint32_t i;
408 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
409 
410 	if (eth_dev->intr_handle == NULL) {
411 		eth_dev->intr_handle =
412 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
413 		if (eth_dev->intr_handle == NULL) {
414 			PMD_DRV_LOG(ERR, "(%s) failed to allocate intr_handle", dev->path);
415 			return -1;
416 		}
417 	}
418 
419 	for (i = 0; i < dev->max_queue_pairs; ++i) {
420 		if (rte_intr_efds_index_set(eth_dev->intr_handle, i,
421 				dev->callfds[2 * i + VTNET_SQ_RQ_QUEUE_IDX]))
422 			return -rte_errno;
423 	}
424 
425 	if (rte_intr_nb_efd_set(eth_dev->intr_handle, dev->max_queue_pairs))
426 		return -rte_errno;
427 
428 	if (rte_intr_max_intr_set(eth_dev->intr_handle,
429 			dev->max_queue_pairs + 1))
430 		return -rte_errno;
431 
432 	if (rte_intr_type_set(eth_dev->intr_handle, RTE_INTR_HANDLE_VDEV))
433 		return -rte_errno;
434 
435 	/* For virtio vdev, no need to read counter for clean */
436 	if (rte_intr_efd_counter_size_set(eth_dev->intr_handle, 0))
437 		return -rte_errno;
438 
439 	if (rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev)))
440 		return -rte_errno;
441 
442 	return 0;
443 }
444 
445 static void
446 virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
447 			 const void *addr,
448 			 size_t len __rte_unused,
449 			 void *arg)
450 {
451 	struct virtio_user_dev *dev = arg;
452 	struct rte_memseg_list *msl;
453 	uint16_t i;
454 	int ret = 0;
455 
456 	/* ignore externally allocated memory */
457 	msl = rte_mem_virt2memseg_list(addr);
458 	if (msl->external)
459 		return;
460 
461 	pthread_mutex_lock(&dev->mutex);
462 
463 	if (dev->started == false)
464 		goto exit;
465 
466 	/* Step 1: pause the active queues */
467 	for (i = 0; i < dev->queue_pairs; i++) {
468 		ret = dev->ops->enable_qp(dev, i, 0);
469 		if (ret < 0)
470 			goto exit;
471 	}
472 
473 	/* Step 2: update memory regions */
474 	ret = dev->ops->set_memory_table(dev);
475 	if (ret < 0)
476 		goto exit;
477 
478 	/* Step 3: resume the active queues */
479 	for (i = 0; i < dev->queue_pairs; i++) {
480 		ret = dev->ops->enable_qp(dev, i, 1);
481 		if (ret < 0)
482 			goto exit;
483 	}
484 
485 exit:
486 	pthread_mutex_unlock(&dev->mutex);
487 
488 	if (ret < 0)
489 		PMD_DRV_LOG(ERR, "(%s) Failed to update memory table", dev->path);
490 }
491 
492 static int
493 virtio_user_dev_setup(struct virtio_user_dev *dev)
494 {
495 	if (dev->is_server) {
496 		if (dev->backend_type != VIRTIO_USER_BACKEND_VHOST_USER) {
497 			PMD_DRV_LOG(ERR, "Server mode only supports vhost-user!");
498 			return -1;
499 		}
500 	}
501 
502 	switch (dev->backend_type) {
503 	case VIRTIO_USER_BACKEND_VHOST_USER:
504 		dev->ops = &virtio_ops_user;
505 		break;
506 	case VIRTIO_USER_BACKEND_VHOST_KERNEL:
507 		dev->ops = &virtio_ops_kernel;
508 		break;
509 	case VIRTIO_USER_BACKEND_VHOST_VDPA:
510 		dev->ops = &virtio_ops_vdpa;
511 		break;
512 	default:
513 		PMD_DRV_LOG(ERR, "(%s) Unknown backend type", dev->path);
514 		return -1;
515 	}
516 
517 	if (dev->ops->setup(dev) < 0) {
518 		PMD_INIT_LOG(ERR, "(%s) Failed to setup backend", dev->path);
519 		return -1;
520 	}
521 
522 	if (virtio_user_dev_init_notify(dev) < 0) {
523 		PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path);
524 		goto destroy;
525 	}
526 
527 	if (virtio_user_fill_intr_handle(dev) < 0) {
528 		PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path);
529 		goto uninit;
530 	}
531 
532 	return 0;
533 
534 uninit:
535 	virtio_user_dev_uninit_notify(dev);
536 destroy:
537 	dev->ops->destroy(dev);
538 
539 	return -1;
540 }
541 
542 /* Use below macro to filter features from vhost backend */
543 #define VIRTIO_USER_SUPPORTED_FEATURES			\
544 	(1ULL << VIRTIO_NET_F_MAC		|	\
545 	 1ULL << VIRTIO_NET_F_STATUS		|	\
546 	 1ULL << VIRTIO_NET_F_MQ		|	\
547 	 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR	|	\
548 	 1ULL << VIRTIO_NET_F_CTRL_VQ		|	\
549 	 1ULL << VIRTIO_NET_F_CTRL_RX		|	\
550 	 1ULL << VIRTIO_NET_F_CTRL_VLAN		|	\
551 	 1ULL << VIRTIO_NET_F_CSUM		|	\
552 	 1ULL << VIRTIO_NET_F_HOST_TSO4		|	\
553 	 1ULL << VIRTIO_NET_F_HOST_TSO6		|	\
554 	 1ULL << VIRTIO_NET_F_MRG_RXBUF		|	\
555 	 1ULL << VIRTIO_RING_F_INDIRECT_DESC	|	\
556 	 1ULL << VIRTIO_NET_F_GUEST_CSUM	|	\
557 	 1ULL << VIRTIO_NET_F_GUEST_TSO4	|	\
558 	 1ULL << VIRTIO_NET_F_GUEST_TSO6	|	\
559 	 1ULL << VIRTIO_F_IN_ORDER		|	\
560 	 1ULL << VIRTIO_F_VERSION_1		|	\
561 	 1ULL << VIRTIO_F_RING_PACKED)
562 
563 int
564 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
565 		     int cq, int queue_size, const char *mac, char **ifname,
566 		     int server, int mrg_rxbuf, int in_order, int packed_vq,
567 		     enum virtio_user_backend_type backend_type)
568 {
569 	uint64_t backend_features;
570 	int i;
571 
572 	pthread_mutex_init(&dev->mutex, NULL);
573 	strlcpy(dev->path, path, PATH_MAX);
574 
575 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
576 		dev->kickfds[i] = -1;
577 		dev->callfds[i] = -1;
578 	}
579 
580 	dev->started = 0;
581 	dev->max_queue_pairs = queues;
582 	dev->queue_pairs = 1; /* mq disabled by default */
583 	dev->queue_size = queue_size;
584 	dev->is_server = server;
585 	dev->mac_specified = 0;
586 	dev->frontend_features = 0;
587 	dev->unsupported_features = 0;
588 	dev->backend_type = backend_type;
589 
590 	if (*ifname) {
591 		dev->ifname = *ifname;
592 		*ifname = NULL;
593 	}
594 
595 	if (virtio_user_dev_setup(dev) < 0) {
596 		PMD_INIT_LOG(ERR, "(%s) backend set up fails", dev->path);
597 		return -1;
598 	}
599 
600 	if (dev->ops->set_owner(dev) < 0) {
601 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend owner", dev->path);
602 		return -1;
603 	}
604 
605 	if (dev->ops->get_backend_features(&backend_features) < 0) {
606 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend features", dev->path);
607 		return -1;
608 	}
609 
610 	dev->unsupported_features = ~(VIRTIO_USER_SUPPORTED_FEATURES | backend_features);
611 
612 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
613 		PMD_INIT_LOG(ERR, "(%s) Failed to get device features", dev->path);
614 		return -1;
615 	}
616 
617 	virtio_user_dev_init_mac(dev, mac);
618 
619 	if (!mrg_rxbuf)
620 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
621 
622 	if (!in_order)
623 		dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
624 
625 	if (!packed_vq)
626 		dev->unsupported_features |= (1ull << VIRTIO_F_RING_PACKED);
627 
628 	if (dev->mac_specified)
629 		dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC);
630 	else
631 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
632 
633 	if (cq) {
634 		/* device does not really need to know anything about CQ,
635 		 * so if necessary, we just claim to support CQ
636 		 */
637 		dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
638 	} else {
639 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
640 		/* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
641 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
642 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
643 		dev->unsupported_features |=
644 			(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
645 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
646 		dev->unsupported_features |=
647 			(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
648 	}
649 
650 	/* The backend will not report this feature, we add it explicitly */
651 	if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER)
652 		dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
653 
654 	dev->frontend_features &= ~dev->unsupported_features;
655 	dev->device_features &= ~dev->unsupported_features;
656 
657 	if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
658 				virtio_user_mem_event_cb, dev)) {
659 		if (rte_errno != ENOTSUP) {
660 			PMD_INIT_LOG(ERR, "(%s) Failed to register mem event callback",
661 					dev->path);
662 			return -1;
663 		}
664 	}
665 
666 	return 0;
667 }
668 
669 void
670 virtio_user_dev_uninit(struct virtio_user_dev *dev)
671 {
672 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
673 
674 	rte_intr_instance_free(eth_dev->intr_handle);
675 	eth_dev->intr_handle = NULL;
676 
677 	virtio_user_stop_device(dev);
678 
679 	rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev);
680 
681 	virtio_user_dev_uninit_notify(dev);
682 
683 	free(dev->ifname);
684 
685 	if (dev->is_server)
686 		unlink(dev->path);
687 
688 	dev->ops->destroy(dev);
689 }
690 
691 uint8_t
692 virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
693 {
694 	uint16_t i;
695 	uint8_t ret = 0;
696 
697 	if (q_pairs > dev->max_queue_pairs) {
698 		PMD_INIT_LOG(ERR, "(%s) multi-q config %u, but only %u supported",
699 			     dev->path, q_pairs, dev->max_queue_pairs);
700 		return -1;
701 	}
702 
703 	for (i = 0; i < q_pairs; ++i)
704 		ret |= dev->ops->enable_qp(dev, i, 1);
705 	for (i = q_pairs; i < dev->max_queue_pairs; ++i)
706 		ret |= dev->ops->enable_qp(dev, i, 0);
707 
708 	dev->queue_pairs = q_pairs;
709 
710 	return ret;
711 }
712 
713 static uint32_t
714 virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
715 			    uint16_t idx_hdr)
716 {
717 	struct virtio_net_ctrl_hdr *hdr;
718 	virtio_net_ctrl_ack status = ~0;
719 	uint16_t i, idx_data, idx_status;
720 	uint32_t n_descs = 0;
721 
722 	/* locate desc for header, data, and status */
723 	idx_data = vring->desc[idx_hdr].next;
724 	n_descs++;
725 
726 	i = idx_data;
727 	while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
728 		i = vring->desc[i].next;
729 		n_descs++;
730 	}
731 
732 	/* locate desc for status */
733 	idx_status = i;
734 	n_descs++;
735 
736 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
737 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
738 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
739 		uint16_t queues;
740 
741 		queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
742 		status = virtio_user_handle_mq(dev, queues);
743 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
744 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
745 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
746 		status = 0;
747 	}
748 
749 	/* Update status */
750 	*(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
751 
752 	return n_descs;
753 }
754 
755 static inline int
756 desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
757 {
758 	uint16_t flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE);
759 
760 	return wrap_counter == !!(flags & VRING_PACKED_DESC_F_AVAIL) &&
761 		wrap_counter != !!(flags & VRING_PACKED_DESC_F_USED);
762 }
763 
764 static uint32_t
765 virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev,
766 				   struct vring_packed *vring,
767 				   uint16_t idx_hdr)
768 {
769 	struct virtio_net_ctrl_hdr *hdr;
770 	virtio_net_ctrl_ack status = ~0;
771 	uint16_t idx_data, idx_status;
772 	/* initialize to one, header is first */
773 	uint32_t n_descs = 1;
774 
775 	/* locate desc for header, data, and status */
776 	idx_data = idx_hdr + 1;
777 	if (idx_data >= dev->queue_size)
778 		idx_data -= dev->queue_size;
779 
780 	n_descs++;
781 
782 	idx_status = idx_data;
783 	while (vring->desc[idx_status].flags & VRING_DESC_F_NEXT) {
784 		idx_status++;
785 		if (idx_status >= dev->queue_size)
786 			idx_status -= dev->queue_size;
787 		n_descs++;
788 	}
789 
790 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
791 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
792 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
793 		uint16_t queues;
794 
795 		queues = *(uint16_t *)(uintptr_t)
796 				vring->desc[idx_data].addr;
797 		status = virtio_user_handle_mq(dev, queues);
798 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
799 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
800 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
801 		status = 0;
802 	}
803 
804 	/* Update status */
805 	*(virtio_net_ctrl_ack *)(uintptr_t)
806 		vring->desc[idx_status].addr = status;
807 
808 	/* Update used descriptor */
809 	vring->desc[idx_hdr].id = vring->desc[idx_status].id;
810 	vring->desc[idx_hdr].len = sizeof(status);
811 
812 	return n_descs;
813 }
814 
815 void
816 virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx)
817 {
818 	struct virtio_user_queue *vq = &dev->packed_queues[queue_idx];
819 	struct vring_packed *vring = &dev->packed_vrings[queue_idx];
820 	uint16_t n_descs, flags;
821 
822 	/* Perform a load-acquire barrier in desc_is_avail to
823 	 * enforce the ordering between desc flags and desc
824 	 * content.
825 	 */
826 	while (desc_is_avail(&vring->desc[vq->used_idx],
827 			     vq->used_wrap_counter)) {
828 
829 		n_descs = virtio_user_handle_ctrl_msg_packed(dev, vring,
830 				vq->used_idx);
831 
832 		flags = VRING_DESC_F_WRITE;
833 		if (vq->used_wrap_counter)
834 			flags |= VRING_PACKED_DESC_F_AVAIL_USED;
835 
836 		__atomic_store_n(&vring->desc[vq->used_idx].flags, flags,
837 				 __ATOMIC_RELEASE);
838 
839 		vq->used_idx += n_descs;
840 		if (vq->used_idx >= dev->queue_size) {
841 			vq->used_idx -= dev->queue_size;
842 			vq->used_wrap_counter ^= 1;
843 		}
844 	}
845 }
846 
847 void
848 virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
849 {
850 	uint16_t avail_idx, desc_idx;
851 	struct vring_used_elem *uep;
852 	uint32_t n_descs;
853 	struct vring *vring = &dev->vrings[queue_idx];
854 
855 	/* Consume avail ring, using used ring idx as first one */
856 	while (__atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
857 	       != vring->avail->idx) {
858 		avail_idx = __atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
859 			    & (vring->num - 1);
860 		desc_idx = vring->avail->ring[avail_idx];
861 
862 		n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
863 
864 		/* Update used ring */
865 		uep = &vring->used->ring[avail_idx];
866 		uep->id = desc_idx;
867 		uep->len = n_descs;
868 
869 		__atomic_add_fetch(&vring->used->idx, 1, __ATOMIC_RELAXED);
870 	}
871 }
872 
873 int
874 virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status)
875 {
876 	int ret;
877 
878 	pthread_mutex_lock(&dev->mutex);
879 	dev->status = status;
880 	ret = dev->ops->set_status(dev, status);
881 	if (ret && ret != -ENOTSUP)
882 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend status", dev->path);
883 
884 	pthread_mutex_unlock(&dev->mutex);
885 	return ret;
886 }
887 
888 int
889 virtio_user_dev_update_status(struct virtio_user_dev *dev)
890 {
891 	int ret;
892 	uint8_t status;
893 
894 	pthread_mutex_lock(&dev->mutex);
895 
896 	ret = dev->ops->get_status(dev, &status);
897 	if (!ret) {
898 		dev->status = status;
899 		PMD_INIT_LOG(DEBUG, "Updated Device Status(0x%08x):\n"
900 			"\t-RESET: %u\n"
901 			"\t-ACKNOWLEDGE: %u\n"
902 			"\t-DRIVER: %u\n"
903 			"\t-DRIVER_OK: %u\n"
904 			"\t-FEATURES_OK: %u\n"
905 			"\t-DEVICE_NEED_RESET: %u\n"
906 			"\t-FAILED: %u",
907 			dev->status,
908 			(dev->status == VIRTIO_CONFIG_STATUS_RESET),
909 			!!(dev->status & VIRTIO_CONFIG_STATUS_ACK),
910 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER),
911 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK),
912 			!!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK),
913 			!!(dev->status & VIRTIO_CONFIG_STATUS_DEV_NEED_RESET),
914 			!!(dev->status & VIRTIO_CONFIG_STATUS_FAILED));
915 	} else if (ret != -ENOTSUP) {
916 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend status", dev->path);
917 	}
918 
919 	pthread_mutex_unlock(&dev->mutex);
920 	return ret;
921 }
922 
923 int
924 virtio_user_dev_update_link_state(struct virtio_user_dev *dev)
925 {
926 	if (dev->ops->update_link_state)
927 		return dev->ops->update_link_state(dev);
928 
929 	return 0;
930 }
931 
932 static void
933 virtio_user_dev_reset_queues_packed(struct rte_eth_dev *eth_dev)
934 {
935 	struct virtio_user_dev *dev = eth_dev->data->dev_private;
936 	struct virtio_hw *hw = &dev->hw;
937 	struct virtnet_rx *rxvq;
938 	struct virtnet_tx *txvq;
939 	uint16_t i;
940 
941 	/* Add lock to avoid queue contention. */
942 	rte_spinlock_lock(&hw->state_lock);
943 	hw->started = 0;
944 
945 	/*
946 	 * Waiting for datapath to complete before resetting queues.
947 	 * 1 ms should be enough for the ongoing Tx/Rx function to finish.
948 	 */
949 	rte_delay_ms(1);
950 
951 	/* Vring reset for each Tx queue and Rx queue. */
952 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
953 		rxvq = eth_dev->data->rx_queues[i];
954 		virtqueue_rxvq_reset_packed(virtnet_rxq_to_vq(rxvq));
955 		virtio_dev_rx_queue_setup_finish(eth_dev, i);
956 	}
957 
958 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
959 		txvq = eth_dev->data->tx_queues[i];
960 		virtqueue_txvq_reset_packed(virtnet_txq_to_vq(txvq));
961 	}
962 
963 	hw->started = 1;
964 	rte_spinlock_unlock(&hw->state_lock);
965 }
966 
967 void
968 virtio_user_dev_delayed_disconnect_handler(void *param)
969 {
970 	struct virtio_user_dev *dev = param;
971 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
972 
973 	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
974 		PMD_DRV_LOG(ERR, "interrupt disable failed");
975 		return;
976 	}
977 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
978 		    rte_intr_fd_get(eth_dev->intr_handle));
979 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
980 					 virtio_interrupt_handler,
981 					 eth_dev) != 1)
982 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
983 
984 	if (dev->is_server) {
985 		if (dev->ops->server_disconnect)
986 			dev->ops->server_disconnect(dev);
987 
988 		rte_intr_fd_set(eth_dev->intr_handle,
989 			dev->ops->get_intr_fd(dev));
990 
991 		PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
992 			    rte_intr_fd_get(eth_dev->intr_handle));
993 
994 		if (rte_intr_callback_register(eth_dev->intr_handle,
995 					       virtio_interrupt_handler,
996 					       eth_dev))
997 			PMD_DRV_LOG(ERR, "interrupt register failed");
998 
999 		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
1000 			PMD_DRV_LOG(ERR, "interrupt enable failed");
1001 			return;
1002 		}
1003 	}
1004 }
1005 
1006 static void
1007 virtio_user_dev_delayed_intr_reconfig_handler(void *param)
1008 {
1009 	struct virtio_user_dev *dev = param;
1010 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1011 
1012 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
1013 		    rte_intr_fd_get(eth_dev->intr_handle));
1014 
1015 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
1016 					 virtio_interrupt_handler,
1017 					 eth_dev) != 1)
1018 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
1019 
1020 	rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev));
1021 
1022 	PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
1023 		    rte_intr_fd_get(eth_dev->intr_handle));
1024 
1025 	if (rte_intr_callback_register(eth_dev->intr_handle,
1026 				       virtio_interrupt_handler, eth_dev))
1027 		PMD_DRV_LOG(ERR, "interrupt register failed");
1028 
1029 	if (rte_intr_enable(eth_dev->intr_handle) < 0)
1030 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1031 }
1032 
1033 int
1034 virtio_user_dev_server_reconnect(struct virtio_user_dev *dev)
1035 {
1036 	int ret, old_status;
1037 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1038 	struct virtio_hw *hw = &dev->hw;
1039 
1040 	if (!dev->ops->server_reconnect) {
1041 		PMD_DRV_LOG(ERR, "(%s) Missing server reconnect callback", dev->path);
1042 		return -1;
1043 	}
1044 
1045 	if (dev->ops->server_reconnect(dev)) {
1046 		PMD_DRV_LOG(ERR, "(%s) Reconnect callback call failed", dev->path);
1047 		return -1;
1048 	}
1049 
1050 	old_status = dev->status;
1051 
1052 	virtio_reset(hw);
1053 
1054 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1055 
1056 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1057 
1058 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
1059 		PMD_INIT_LOG(ERR, "get_features failed: %s",
1060 			     strerror(errno));
1061 		return -1;
1062 	}
1063 
1064 	/* unmask vhost-user unsupported features */
1065 	dev->device_features &= ~(dev->unsupported_features);
1066 
1067 	dev->features &= (dev->device_features | dev->frontend_features);
1068 
1069 	/* For packed ring, resetting queues is required in reconnection. */
1070 	if (virtio_with_packed_queue(hw) &&
1071 	   (old_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) {
1072 		PMD_INIT_LOG(NOTICE, "Packets on the fly will be dropped"
1073 				" when packed ring reconnecting.");
1074 		virtio_user_dev_reset_queues_packed(eth_dev);
1075 	}
1076 
1077 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1078 
1079 	/* Start the device */
1080 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
1081 	if (!dev->started)
1082 		return -1;
1083 
1084 	if (dev->queue_pairs > 1) {
1085 		ret = virtio_user_handle_mq(dev, dev->queue_pairs);
1086 		if (ret != 0) {
1087 			PMD_INIT_LOG(ERR, "Fails to enable multi-queue pairs!");
1088 			return -1;
1089 		}
1090 	}
1091 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1092 		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
1093 			PMD_DRV_LOG(ERR, "interrupt disable failed");
1094 			return -1;
1095 		}
1096 		/*
1097 		 * This function can be called from the interrupt handler, so
1098 		 * we can't unregister interrupt handler here.  Setting
1099 		 * alarm to do that later.
1100 		 */
1101 		rte_eal_alarm_set(1,
1102 			virtio_user_dev_delayed_intr_reconfig_handler,
1103 			(void *)dev);
1104 	}
1105 	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
1106 	return 0;
1107 }
1108