xref: /dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c (revision 66b45ceaf5dd02498e67a1fb0705441fda8742c4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <fcntl.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <sys/mman.h>
12 #include <unistd.h>
13 #include <sys/eventfd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 
17 #include <rte_alarm.h>
18 #include <rte_string_fns.h>
19 #include <rte_eal_memconfig.h>
20 
21 #include "vhost.h"
22 #include "virtio_user_dev.h"
23 #include "../virtio_ethdev.h"
24 
25 #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb"
26 
27 const char * const virtio_user_backend_strings[] = {
28 	[VIRTIO_USER_BACKEND_UNKNOWN] = "VIRTIO_USER_BACKEND_UNKNOWN",
29 	[VIRTIO_USER_BACKEND_VHOST_USER] = "VHOST_USER",
30 	[VIRTIO_USER_BACKEND_VHOST_KERNEL] = "VHOST_NET",
31 	[VIRTIO_USER_BACKEND_VHOST_VDPA] = "VHOST_VDPA",
32 };
33 
34 static int
35 virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
36 {
37 	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
38 	 * firstly because vhost depends on this msg to allocate virtqueue
39 	 * pair.
40 	 */
41 	struct vhost_vring_file file;
42 	int ret;
43 
44 	file.index = queue_sel;
45 	file.fd = dev->callfds[queue_sel];
46 	ret = dev->ops->set_vring_call(dev, &file);
47 	if (ret < 0) {
48 		PMD_INIT_LOG(ERR, "(%s) Failed to create queue %u", dev->path, queue_sel);
49 		return -1;
50 	}
51 
52 	return 0;
53 }
54 
55 static int
56 virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
57 {
58 	int ret;
59 	struct vhost_vring_file file;
60 	struct vhost_vring_state state;
61 	struct vring *vring = &dev->vrings[queue_sel];
62 	struct vring_packed *pq_vring = &dev->packed_vrings[queue_sel];
63 	struct vhost_vring_addr addr = {
64 		.index = queue_sel,
65 		.log_guest_addr = 0,
66 		.flags = 0, /* disable log */
67 	};
68 
69 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
70 		addr.desc_user_addr =
71 			(uint64_t)(uintptr_t)pq_vring->desc;
72 		addr.avail_user_addr =
73 			(uint64_t)(uintptr_t)pq_vring->driver;
74 		addr.used_user_addr =
75 			(uint64_t)(uintptr_t)pq_vring->device;
76 	} else {
77 		addr.desc_user_addr = (uint64_t)(uintptr_t)vring->desc;
78 		addr.avail_user_addr = (uint64_t)(uintptr_t)vring->avail;
79 		addr.used_user_addr = (uint64_t)(uintptr_t)vring->used;
80 	}
81 
82 	state.index = queue_sel;
83 	state.num = vring->num;
84 	ret = dev->ops->set_vring_num(dev, &state);
85 	if (ret < 0)
86 		goto err;
87 
88 	state.index = queue_sel;
89 	state.num = 0; /* no reservation */
90 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
91 		state.num |= (1 << 15);
92 	ret = dev->ops->set_vring_base(dev, &state);
93 	if (ret < 0)
94 		goto err;
95 
96 	ret = dev->ops->set_vring_addr(dev, &addr);
97 	if (ret < 0)
98 		goto err;
99 
100 	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
101 	 * lastly because vhost depends on this msg to judge if
102 	 * virtio is ready.
103 	 */
104 	file.index = queue_sel;
105 	file.fd = dev->kickfds[queue_sel];
106 	ret = dev->ops->set_vring_kick(dev, &file);
107 	if (ret < 0)
108 		goto err;
109 
110 	return 0;
111 err:
112 	PMD_INIT_LOG(ERR, "(%s) Failed to kick queue %u", dev->path, queue_sel);
113 
114 	return -1;
115 }
116 
117 static int
118 virtio_user_queue_setup(struct virtio_user_dev *dev,
119 			int (*fn)(struct virtio_user_dev *, uint32_t))
120 {
121 	uint32_t i;
122 
123 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
124 		if (fn(dev, i) < 0) {
125 			PMD_DRV_LOG(ERR, "(%s) setup VQ %u failed", dev->path, i);
126 			return -1;
127 		}
128 	}
129 
130 	return 0;
131 }
132 
133 int
134 virtio_user_dev_set_features(struct virtio_user_dev *dev)
135 {
136 	uint64_t features;
137 	int ret = -1;
138 
139 	pthread_mutex_lock(&dev->mutex);
140 
141 	/* Step 0: tell vhost to create queues */
142 	if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
143 		goto error;
144 
145 	features = dev->features;
146 
147 	/* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
148 	features &= ~(1ull << VIRTIO_NET_F_MAC);
149 	/* Strip VIRTIO_NET_F_CTRL_VQ if the devices does not really support control VQ */
150 	if (!dev->hw_cvq)
151 		features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
152 	features &= ~(1ull << VIRTIO_NET_F_STATUS);
153 	ret = dev->ops->set_features(dev, features);
154 	if (ret < 0)
155 		goto error;
156 	PMD_DRV_LOG(INFO, "(%s) set features: 0x%" PRIx64, dev->path, features);
157 error:
158 	pthread_mutex_unlock(&dev->mutex);
159 
160 	return ret;
161 }
162 
163 int
164 virtio_user_start_device(struct virtio_user_dev *dev)
165 {
166 	int ret;
167 
168 	/*
169 	 * XXX workaround!
170 	 *
171 	 * We need to make sure that the locks will be
172 	 * taken in the correct order to avoid deadlocks.
173 	 *
174 	 * Before releasing this lock, this thread should
175 	 * not trigger any memory hotplug events.
176 	 *
177 	 * This is a temporary workaround, and should be
178 	 * replaced when we get proper supports from the
179 	 * memory subsystem in the future.
180 	 */
181 	rte_mcfg_mem_read_lock();
182 	pthread_mutex_lock(&dev->mutex);
183 
184 	/* Step 2: share memory regions */
185 	ret = dev->ops->set_memory_table(dev);
186 	if (ret < 0)
187 		goto error;
188 
189 	/* Step 3: kick queues */
190 	ret = virtio_user_queue_setup(dev, virtio_user_kick_queue);
191 	if (ret < 0)
192 		goto error;
193 
194 	/* Step 4: enable queues
195 	 * we enable the 1st queue pair by default.
196 	 */
197 	ret = dev->ops->enable_qp(dev, 0, 1);
198 	if (ret < 0)
199 		goto error;
200 
201 	dev->started = true;
202 
203 	pthread_mutex_unlock(&dev->mutex);
204 	rte_mcfg_mem_read_unlock();
205 
206 	return 0;
207 error:
208 	pthread_mutex_unlock(&dev->mutex);
209 	rte_mcfg_mem_read_unlock();
210 
211 	PMD_INIT_LOG(ERR, "(%s) Failed to start device", dev->path);
212 
213 	/* TODO: free resource here or caller to check */
214 	return -1;
215 }
216 
217 int virtio_user_stop_device(struct virtio_user_dev *dev)
218 {
219 	struct vhost_vring_state state;
220 	uint32_t i;
221 	int ret;
222 
223 	pthread_mutex_lock(&dev->mutex);
224 	if (!dev->started)
225 		goto out;
226 
227 	for (i = 0; i < dev->max_queue_pairs; ++i) {
228 		ret = dev->ops->enable_qp(dev, i, 0);
229 		if (ret < 0)
230 			goto err;
231 	}
232 
233 	/* Stop the backend. */
234 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
235 		state.index = i;
236 		ret = dev->ops->get_vring_base(dev, &state);
237 		if (ret < 0) {
238 			PMD_DRV_LOG(ERR, "(%s) get_vring_base failed, index=%u", dev->path, i);
239 			goto err;
240 		}
241 	}
242 
243 	dev->started = false;
244 
245 out:
246 	pthread_mutex_unlock(&dev->mutex);
247 
248 	return 0;
249 err:
250 	pthread_mutex_unlock(&dev->mutex);
251 
252 	PMD_INIT_LOG(ERR, "(%s) Failed to stop device", dev->path);
253 
254 	return -1;
255 }
256 
257 static int
258 virtio_user_dev_init_max_queue_pairs(struct virtio_user_dev *dev, uint32_t user_max_qp)
259 {
260 	int ret;
261 
262 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MQ))) {
263 		dev->max_queue_pairs = 1;
264 		return 0;
265 	}
266 
267 	if (!dev->ops->get_config) {
268 		dev->max_queue_pairs = user_max_qp;
269 		return 0;
270 	}
271 
272 	ret = dev->ops->get_config(dev, (uint8_t *)&dev->max_queue_pairs,
273 			offsetof(struct virtio_net_config, max_virtqueue_pairs),
274 			sizeof(uint16_t));
275 	if (ret) {
276 		/*
277 		 * We need to know the max queue pair from the device so that
278 		 * the control queue gets the right index.
279 		 */
280 		dev->max_queue_pairs = 1;
281 		PMD_DRV_LOG(ERR, "(%s) Failed to get max queue pairs from device", dev->path);
282 
283 		return ret;
284 	}
285 
286 	if (dev->max_queue_pairs > VIRTIO_MAX_VIRTQUEUE_PAIRS) {
287 		/*
288 		 * If the device supports control queue, the control queue
289 		 * index is max_virtqueue_pairs * 2. Disable MQ if it happens.
290 		 */
291 		PMD_DRV_LOG(ERR, "(%s) Device advertises too many queues (%u, max supported %u)",
292 				dev->path, dev->max_queue_pairs, VIRTIO_MAX_VIRTQUEUE_PAIRS);
293 		dev->max_queue_pairs = 1;
294 
295 		return -1;
296 	}
297 
298 	return 0;
299 }
300 
301 int
302 virtio_user_dev_set_mac(struct virtio_user_dev *dev)
303 {
304 	int ret = 0;
305 
306 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
307 		return -ENOTSUP;
308 
309 	if (!dev->ops->set_config)
310 		return -ENOTSUP;
311 
312 	ret = dev->ops->set_config(dev, dev->mac_addr,
313 			offsetof(struct virtio_net_config, mac),
314 			RTE_ETHER_ADDR_LEN);
315 	if (ret)
316 		PMD_DRV_LOG(ERR, "(%s) Failed to set MAC address in device", dev->path);
317 
318 	return ret;
319 }
320 
321 int
322 virtio_user_dev_get_mac(struct virtio_user_dev *dev)
323 {
324 	int ret = 0;
325 
326 	if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC)))
327 		return -ENOTSUP;
328 
329 	if (!dev->ops->get_config)
330 		return -ENOTSUP;
331 
332 	ret = dev->ops->get_config(dev, dev->mac_addr,
333 			offsetof(struct virtio_net_config, mac),
334 			RTE_ETHER_ADDR_LEN);
335 	if (ret)
336 		PMD_DRV_LOG(ERR, "(%s) Failed to get MAC address from device", dev->path);
337 
338 	return ret;
339 }
340 
341 static void
342 virtio_user_dev_init_mac(struct virtio_user_dev *dev, const char *mac)
343 {
344 	struct rte_ether_addr cmdline_mac;
345 	char buf[RTE_ETHER_ADDR_FMT_SIZE];
346 	int ret;
347 
348 	if (mac && rte_ether_unformat_addr(mac, &cmdline_mac) == 0) {
349 		/*
350 		 * MAC address was passed from command-line, try to store
351 		 * it in the device if it supports it. Otherwise try to use
352 		 * the device one.
353 		 */
354 		memcpy(dev->mac_addr, &cmdline_mac, RTE_ETHER_ADDR_LEN);
355 		dev->mac_specified = 1;
356 
357 		/* Setting MAC may fail, continue to get the device one in this case */
358 		virtio_user_dev_set_mac(dev);
359 		ret = virtio_user_dev_get_mac(dev);
360 		if (ret == -ENOTSUP)
361 			goto out;
362 
363 		if (memcmp(&cmdline_mac, dev->mac_addr, RTE_ETHER_ADDR_LEN))
364 			PMD_DRV_LOG(INFO, "(%s) Device MAC update failed", dev->path);
365 	} else {
366 		ret = virtio_user_dev_get_mac(dev);
367 		if (ret) {
368 			PMD_DRV_LOG(ERR, "(%s) No valid MAC in devargs or device, use random",
369 					dev->path);
370 			return;
371 		}
372 
373 		dev->mac_specified = 1;
374 	}
375 out:
376 	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE,
377 			(struct rte_ether_addr *)dev->mac_addr);
378 	PMD_DRV_LOG(INFO, "(%s) MAC %s specified", dev->path, buf);
379 }
380 
381 static int
382 virtio_user_dev_init_notify(struct virtio_user_dev *dev)
383 {
384 	uint32_t i, j;
385 	int callfd;
386 	int kickfd;
387 
388 	for (i = 0; i < dev->max_queue_pairs * 2; i++) {
389 		/* May use invalid flag, but some backend uses kickfd and
390 		 * callfd as criteria to judge if dev is alive. so finally we
391 		 * use real event_fd.
392 		 */
393 		callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
394 		if (callfd < 0) {
395 			PMD_DRV_LOG(ERR, "(%s) callfd error, %s", dev->path, strerror(errno));
396 			goto err;
397 		}
398 		kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
399 		if (kickfd < 0) {
400 			close(callfd);
401 			PMD_DRV_LOG(ERR, "(%s) kickfd error, %s", dev->path, strerror(errno));
402 			goto err;
403 		}
404 		dev->callfds[i] = callfd;
405 		dev->kickfds[i] = kickfd;
406 	}
407 
408 	return 0;
409 err:
410 	for (j = 0; j < i; j++) {
411 		if (dev->kickfds[j] >= 0) {
412 			close(dev->kickfds[j]);
413 			dev->kickfds[j] = -1;
414 		}
415 		if (dev->callfds[j] >= 0) {
416 			close(dev->callfds[j]);
417 			dev->callfds[j] = -1;
418 		}
419 	}
420 
421 	return -1;
422 }
423 
424 static void
425 virtio_user_dev_uninit_notify(struct virtio_user_dev *dev)
426 {
427 	uint32_t i;
428 
429 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
430 		if (dev->kickfds[i] >= 0) {
431 			close(dev->kickfds[i]);
432 			dev->kickfds[i] = -1;
433 		}
434 		if (dev->callfds[i] >= 0) {
435 			close(dev->callfds[i]);
436 			dev->callfds[i] = -1;
437 		}
438 	}
439 }
440 
441 static int
442 virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
443 {
444 	uint32_t i;
445 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
446 
447 	if (eth_dev->intr_handle == NULL) {
448 		eth_dev->intr_handle =
449 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
450 		if (eth_dev->intr_handle == NULL) {
451 			PMD_DRV_LOG(ERR, "(%s) failed to allocate intr_handle", dev->path);
452 			return -1;
453 		}
454 	}
455 
456 	for (i = 0; i < dev->max_queue_pairs; ++i) {
457 		if (rte_intr_efds_index_set(eth_dev->intr_handle, i,
458 				dev->callfds[2 * i + VTNET_SQ_RQ_QUEUE_IDX]))
459 			return -rte_errno;
460 	}
461 
462 	if (rte_intr_nb_efd_set(eth_dev->intr_handle, dev->max_queue_pairs))
463 		return -rte_errno;
464 
465 	if (rte_intr_max_intr_set(eth_dev->intr_handle,
466 			dev->max_queue_pairs + 1))
467 		return -rte_errno;
468 
469 	if (rte_intr_type_set(eth_dev->intr_handle, RTE_INTR_HANDLE_VDEV))
470 		return -rte_errno;
471 
472 	/* For virtio vdev, no need to read counter for clean */
473 	if (rte_intr_efd_counter_size_set(eth_dev->intr_handle, 0))
474 		return -rte_errno;
475 
476 	if (rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev)))
477 		return -rte_errno;
478 
479 	return 0;
480 }
481 
482 static void
483 virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
484 			 const void *addr,
485 			 size_t len __rte_unused,
486 			 void *arg)
487 {
488 	struct virtio_user_dev *dev = arg;
489 	struct rte_memseg_list *msl;
490 	uint16_t i;
491 	int ret = 0;
492 
493 	/* ignore externally allocated memory */
494 	msl = rte_mem_virt2memseg_list(addr);
495 	if (msl->external)
496 		return;
497 
498 	pthread_mutex_lock(&dev->mutex);
499 
500 	if (dev->started == false)
501 		goto exit;
502 
503 	/* Step 1: pause the active queues */
504 	for (i = 0; i < dev->queue_pairs; i++) {
505 		ret = dev->ops->enable_qp(dev, i, 0);
506 		if (ret < 0)
507 			goto exit;
508 	}
509 
510 	/* Step 2: update memory regions */
511 	ret = dev->ops->set_memory_table(dev);
512 	if (ret < 0)
513 		goto exit;
514 
515 	/* Step 3: resume the active queues */
516 	for (i = 0; i < dev->queue_pairs; i++) {
517 		ret = dev->ops->enable_qp(dev, i, 1);
518 		if (ret < 0)
519 			goto exit;
520 	}
521 
522 exit:
523 	pthread_mutex_unlock(&dev->mutex);
524 
525 	if (ret < 0)
526 		PMD_DRV_LOG(ERR, "(%s) Failed to update memory table", dev->path);
527 }
528 
529 static int
530 virtio_user_dev_setup(struct virtio_user_dev *dev)
531 {
532 	if (dev->is_server) {
533 		if (dev->backend_type != VIRTIO_USER_BACKEND_VHOST_USER) {
534 			PMD_DRV_LOG(ERR, "Server mode only supports vhost-user!");
535 			return -1;
536 		}
537 	}
538 
539 	switch (dev->backend_type) {
540 	case VIRTIO_USER_BACKEND_VHOST_USER:
541 		dev->ops = &virtio_ops_user;
542 		break;
543 	case VIRTIO_USER_BACKEND_VHOST_KERNEL:
544 		dev->ops = &virtio_ops_kernel;
545 		break;
546 	case VIRTIO_USER_BACKEND_VHOST_VDPA:
547 		dev->ops = &virtio_ops_vdpa;
548 		break;
549 	default:
550 		PMD_DRV_LOG(ERR, "(%s) Unknown backend type", dev->path);
551 		return -1;
552 	}
553 
554 	if (dev->ops->setup(dev) < 0) {
555 		PMD_INIT_LOG(ERR, "(%s) Failed to setup backend", dev->path);
556 		return -1;
557 	}
558 
559 	return 0;
560 }
561 
562 /* Use below macro to filter features from vhost backend */
563 #define VIRTIO_USER_SUPPORTED_FEATURES			\
564 	(1ULL << VIRTIO_NET_F_MAC		|	\
565 	 1ULL << VIRTIO_NET_F_STATUS		|	\
566 	 1ULL << VIRTIO_NET_F_MQ		|	\
567 	 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR	|	\
568 	 1ULL << VIRTIO_NET_F_CTRL_VQ		|	\
569 	 1ULL << VIRTIO_NET_F_CTRL_RX		|	\
570 	 1ULL << VIRTIO_NET_F_CTRL_VLAN		|	\
571 	 1ULL << VIRTIO_NET_F_CSUM		|	\
572 	 1ULL << VIRTIO_NET_F_HOST_TSO4		|	\
573 	 1ULL << VIRTIO_NET_F_HOST_TSO6		|	\
574 	 1ULL << VIRTIO_NET_F_MRG_RXBUF		|	\
575 	 1ULL << VIRTIO_RING_F_INDIRECT_DESC	|	\
576 	 1ULL << VIRTIO_NET_F_GUEST_CSUM	|	\
577 	 1ULL << VIRTIO_NET_F_GUEST_TSO4	|	\
578 	 1ULL << VIRTIO_NET_F_GUEST_TSO6	|	\
579 	 1ULL << VIRTIO_F_IN_ORDER		|	\
580 	 1ULL << VIRTIO_F_VERSION_1		|	\
581 	 1ULL << VIRTIO_F_RING_PACKED)
582 
583 int
584 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues,
585 		     int cq, int queue_size, const char *mac, char **ifname,
586 		     int server, int mrg_rxbuf, int in_order, int packed_vq,
587 		     enum virtio_user_backend_type backend_type)
588 {
589 	uint64_t backend_features;
590 	int i;
591 
592 	pthread_mutex_init(&dev->mutex, NULL);
593 	strlcpy(dev->path, path, PATH_MAX);
594 
595 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
596 		dev->kickfds[i] = -1;
597 		dev->callfds[i] = -1;
598 	}
599 
600 	dev->started = 0;
601 	dev->queue_pairs = 1; /* mq disabled by default */
602 	dev->queue_size = queue_size;
603 	dev->is_server = server;
604 	dev->mac_specified = 0;
605 	dev->frontend_features = 0;
606 	dev->unsupported_features = 0;
607 	dev->backend_type = backend_type;
608 
609 	if (*ifname) {
610 		dev->ifname = *ifname;
611 		*ifname = NULL;
612 	}
613 
614 	if (virtio_user_dev_setup(dev) < 0) {
615 		PMD_INIT_LOG(ERR, "(%s) backend set up fails", dev->path);
616 		return -1;
617 	}
618 
619 	if (dev->ops->set_owner(dev) < 0) {
620 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend owner", dev->path);
621 		goto destroy;
622 	}
623 
624 	if (dev->ops->get_backend_features(&backend_features) < 0) {
625 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend features", dev->path);
626 		goto destroy;
627 	}
628 
629 	dev->unsupported_features = ~(VIRTIO_USER_SUPPORTED_FEATURES | backend_features);
630 
631 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
632 		PMD_INIT_LOG(ERR, "(%s) Failed to get device features", dev->path);
633 		goto destroy;
634 	}
635 
636 	virtio_user_dev_init_mac(dev, mac);
637 
638 	if (virtio_user_dev_init_max_queue_pairs(dev, queues))
639 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
640 
641 	if (dev->max_queue_pairs > 1)
642 		cq = 1;
643 
644 	if (virtio_user_dev_init_notify(dev) < 0) {
645 		PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path);
646 		goto destroy;
647 	}
648 
649 	if (virtio_user_fill_intr_handle(dev) < 0) {
650 		PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path);
651 		goto notify_uninit;
652 	}
653 
654 	if (!mrg_rxbuf)
655 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
656 
657 	if (!in_order)
658 		dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
659 
660 	if (!packed_vq)
661 		dev->unsupported_features |= (1ull << VIRTIO_F_RING_PACKED);
662 
663 	if (dev->mac_specified)
664 		dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC);
665 	else
666 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
667 
668 	if (cq) {
669 		/* device does not really need to know anything about CQ,
670 		 * so if necessary, we just claim to support CQ
671 		 */
672 		dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
673 	} else {
674 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
675 		/* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
676 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
677 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
678 		dev->unsupported_features |=
679 			(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
680 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
681 		dev->unsupported_features |=
682 			(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
683 	}
684 
685 	/* The backend will not report this feature, we add it explicitly */
686 	if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER)
687 		dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
688 
689 	dev->frontend_features &= ~dev->unsupported_features;
690 	dev->device_features &= ~dev->unsupported_features;
691 
692 	if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
693 				virtio_user_mem_event_cb, dev)) {
694 		if (rte_errno != ENOTSUP) {
695 			PMD_INIT_LOG(ERR, "(%s) Failed to register mem event callback",
696 					dev->path);
697 			goto notify_uninit;
698 		}
699 	}
700 
701 	return 0;
702 
703 notify_uninit:
704 	virtio_user_dev_uninit_notify(dev);
705 destroy:
706 	dev->ops->destroy(dev);
707 
708 	return -1;
709 }
710 
711 void
712 virtio_user_dev_uninit(struct virtio_user_dev *dev)
713 {
714 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
715 
716 	rte_intr_instance_free(eth_dev->intr_handle);
717 	eth_dev->intr_handle = NULL;
718 
719 	virtio_user_stop_device(dev);
720 
721 	rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev);
722 
723 	virtio_user_dev_uninit_notify(dev);
724 
725 	free(dev->ifname);
726 
727 	if (dev->is_server)
728 		unlink(dev->path);
729 
730 	dev->ops->destroy(dev);
731 }
732 
733 uint8_t
734 virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
735 {
736 	uint16_t i;
737 	uint8_t ret = 0;
738 
739 	if (q_pairs > dev->max_queue_pairs) {
740 		PMD_INIT_LOG(ERR, "(%s) multi-q config %u, but only %u supported",
741 			     dev->path, q_pairs, dev->max_queue_pairs);
742 		return -1;
743 	}
744 
745 	for (i = 0; i < q_pairs; ++i)
746 		ret |= dev->ops->enable_qp(dev, i, 1);
747 	for (i = q_pairs; i < dev->max_queue_pairs; ++i)
748 		ret |= dev->ops->enable_qp(dev, i, 0);
749 
750 	dev->queue_pairs = q_pairs;
751 
752 	return ret;
753 }
754 
755 static uint32_t
756 virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
757 			    uint16_t idx_hdr)
758 {
759 	struct virtio_net_ctrl_hdr *hdr;
760 	virtio_net_ctrl_ack status = ~0;
761 	uint16_t i, idx_data, idx_status;
762 	uint32_t n_descs = 0;
763 
764 	/* locate desc for header, data, and status */
765 	idx_data = vring->desc[idx_hdr].next;
766 	n_descs++;
767 
768 	i = idx_data;
769 	while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
770 		i = vring->desc[i].next;
771 		n_descs++;
772 	}
773 
774 	/* locate desc for status */
775 	idx_status = i;
776 	n_descs++;
777 
778 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
779 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
780 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
781 		uint16_t queues;
782 
783 		queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
784 		status = virtio_user_handle_mq(dev, queues);
785 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
786 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
787 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
788 		status = 0;
789 	}
790 
791 	/* Update status */
792 	*(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
793 
794 	return n_descs;
795 }
796 
797 static inline int
798 desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
799 {
800 	uint16_t flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE);
801 
802 	return wrap_counter == !!(flags & VRING_PACKED_DESC_F_AVAIL) &&
803 		wrap_counter != !!(flags & VRING_PACKED_DESC_F_USED);
804 }
805 
806 static uint32_t
807 virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev,
808 				   struct vring_packed *vring,
809 				   uint16_t idx_hdr)
810 {
811 	struct virtio_net_ctrl_hdr *hdr;
812 	virtio_net_ctrl_ack status = ~0;
813 	uint16_t idx_data, idx_status;
814 	/* initialize to one, header is first */
815 	uint32_t n_descs = 1;
816 
817 	/* locate desc for header, data, and status */
818 	idx_data = idx_hdr + 1;
819 	if (idx_data >= dev->queue_size)
820 		idx_data -= dev->queue_size;
821 
822 	n_descs++;
823 
824 	idx_status = idx_data;
825 	while (vring->desc[idx_status].flags & VRING_DESC_F_NEXT) {
826 		idx_status++;
827 		if (idx_status >= dev->queue_size)
828 			idx_status -= dev->queue_size;
829 		n_descs++;
830 	}
831 
832 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
833 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
834 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
835 		uint16_t queues;
836 
837 		queues = *(uint16_t *)(uintptr_t)
838 				vring->desc[idx_data].addr;
839 		status = virtio_user_handle_mq(dev, queues);
840 	} else if (hdr->class == VIRTIO_NET_CTRL_RX  ||
841 		   hdr->class == VIRTIO_NET_CTRL_MAC ||
842 		   hdr->class == VIRTIO_NET_CTRL_VLAN) {
843 		status = 0;
844 	}
845 
846 	/* Update status */
847 	*(virtio_net_ctrl_ack *)(uintptr_t)
848 		vring->desc[idx_status].addr = status;
849 
850 	/* Update used descriptor */
851 	vring->desc[idx_hdr].id = vring->desc[idx_status].id;
852 	vring->desc[idx_hdr].len = sizeof(status);
853 
854 	return n_descs;
855 }
856 
857 void
858 virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx)
859 {
860 	struct virtio_user_queue *vq = &dev->packed_queues[queue_idx];
861 	struct vring_packed *vring = &dev->packed_vrings[queue_idx];
862 	uint16_t n_descs, flags;
863 
864 	/* Perform a load-acquire barrier in desc_is_avail to
865 	 * enforce the ordering between desc flags and desc
866 	 * content.
867 	 */
868 	while (desc_is_avail(&vring->desc[vq->used_idx],
869 			     vq->used_wrap_counter)) {
870 
871 		n_descs = virtio_user_handle_ctrl_msg_packed(dev, vring,
872 				vq->used_idx);
873 
874 		flags = VRING_DESC_F_WRITE;
875 		if (vq->used_wrap_counter)
876 			flags |= VRING_PACKED_DESC_F_AVAIL_USED;
877 
878 		__atomic_store_n(&vring->desc[vq->used_idx].flags, flags,
879 				 __ATOMIC_RELEASE);
880 
881 		vq->used_idx += n_descs;
882 		if (vq->used_idx >= dev->queue_size) {
883 			vq->used_idx -= dev->queue_size;
884 			vq->used_wrap_counter ^= 1;
885 		}
886 	}
887 }
888 
889 void
890 virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
891 {
892 	uint16_t avail_idx, desc_idx;
893 	struct vring_used_elem *uep;
894 	uint32_t n_descs;
895 	struct vring *vring = &dev->vrings[queue_idx];
896 
897 	/* Consume avail ring, using used ring idx as first one */
898 	while (__atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
899 	       != vring->avail->idx) {
900 		avail_idx = __atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
901 			    & (vring->num - 1);
902 		desc_idx = vring->avail->ring[avail_idx];
903 
904 		n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
905 
906 		/* Update used ring */
907 		uep = &vring->used->ring[avail_idx];
908 		uep->id = desc_idx;
909 		uep->len = n_descs;
910 
911 		__atomic_add_fetch(&vring->used->idx, 1, __ATOMIC_RELAXED);
912 	}
913 }
914 
915 static void
916 virtio_user_control_queue_notify(struct virtqueue *vq, void *cookie)
917 {
918 	struct virtio_user_dev *dev = cookie;
919 	uint64_t buf = 1;
920 
921 	if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0)
922 		PMD_DRV_LOG(ERR, "failed to kick backend: %s",
923 			    strerror(errno));
924 }
925 
926 int
927 virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq)
928 {
929 	char name[VIRTQUEUE_MAX_NAME_SZ];
930 	struct virtqueue *scvq;
931 
932 	snprintf(name, sizeof(name), "port%d_shadow_cvq", vq->hw->port_id);
933 	scvq = virtqueue_alloc(&dev->hw, vq->vq_queue_index, vq->vq_nentries,
934 			VTNET_CQ, SOCKET_ID_ANY, name);
935 	if (!scvq) {
936 		PMD_INIT_LOG(ERR, "(%s) Failed to alloc shadow control vq\n", dev->path);
937 		return -ENOMEM;
938 	}
939 
940 	scvq->cq.notify_queue = &virtio_user_control_queue_notify;
941 	scvq->cq.notify_cookie = dev;
942 	dev->scvq = scvq;
943 
944 	return 0;
945 }
946 
947 void
948 virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev)
949 {
950 	if (!dev->scvq)
951 		return;
952 
953 	virtqueue_free(dev->scvq);
954 	dev->scvq = NULL;
955 }
956 
957 int
958 virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status)
959 {
960 	int ret;
961 
962 	pthread_mutex_lock(&dev->mutex);
963 	dev->status = status;
964 	ret = dev->ops->set_status(dev, status);
965 	if (ret && ret != -ENOTSUP)
966 		PMD_INIT_LOG(ERR, "(%s) Failed to set backend status", dev->path);
967 
968 	pthread_mutex_unlock(&dev->mutex);
969 	return ret;
970 }
971 
972 int
973 virtio_user_dev_update_status(struct virtio_user_dev *dev)
974 {
975 	int ret;
976 	uint8_t status;
977 
978 	pthread_mutex_lock(&dev->mutex);
979 
980 	ret = dev->ops->get_status(dev, &status);
981 	if (!ret) {
982 		dev->status = status;
983 		PMD_INIT_LOG(DEBUG, "Updated Device Status(0x%08x):\n"
984 			"\t-RESET: %u\n"
985 			"\t-ACKNOWLEDGE: %u\n"
986 			"\t-DRIVER: %u\n"
987 			"\t-DRIVER_OK: %u\n"
988 			"\t-FEATURES_OK: %u\n"
989 			"\t-DEVICE_NEED_RESET: %u\n"
990 			"\t-FAILED: %u",
991 			dev->status,
992 			(dev->status == VIRTIO_CONFIG_STATUS_RESET),
993 			!!(dev->status & VIRTIO_CONFIG_STATUS_ACK),
994 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER),
995 			!!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK),
996 			!!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK),
997 			!!(dev->status & VIRTIO_CONFIG_STATUS_DEV_NEED_RESET),
998 			!!(dev->status & VIRTIO_CONFIG_STATUS_FAILED));
999 	} else if (ret != -ENOTSUP) {
1000 		PMD_INIT_LOG(ERR, "(%s) Failed to get backend status", dev->path);
1001 	}
1002 
1003 	pthread_mutex_unlock(&dev->mutex);
1004 	return ret;
1005 }
1006 
1007 int
1008 virtio_user_dev_update_link_state(struct virtio_user_dev *dev)
1009 {
1010 	if (dev->ops->update_link_state)
1011 		return dev->ops->update_link_state(dev);
1012 
1013 	return 0;
1014 }
1015 
1016 static void
1017 virtio_user_dev_reset_queues_packed(struct rte_eth_dev *eth_dev)
1018 {
1019 	struct virtio_user_dev *dev = eth_dev->data->dev_private;
1020 	struct virtio_hw *hw = &dev->hw;
1021 	struct virtnet_rx *rxvq;
1022 	struct virtnet_tx *txvq;
1023 	uint16_t i;
1024 
1025 	/* Add lock to avoid queue contention. */
1026 	rte_spinlock_lock(&hw->state_lock);
1027 	hw->started = 0;
1028 
1029 	/*
1030 	 * Waiting for datapath to complete before resetting queues.
1031 	 * 1 ms should be enough for the ongoing Tx/Rx function to finish.
1032 	 */
1033 	rte_delay_ms(1);
1034 
1035 	/* Vring reset for each Tx queue and Rx queue. */
1036 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
1037 		rxvq = eth_dev->data->rx_queues[i];
1038 		virtqueue_rxvq_reset_packed(virtnet_rxq_to_vq(rxvq));
1039 		virtio_dev_rx_queue_setup_finish(eth_dev, i);
1040 	}
1041 
1042 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
1043 		txvq = eth_dev->data->tx_queues[i];
1044 		virtqueue_txvq_reset_packed(virtnet_txq_to_vq(txvq));
1045 	}
1046 
1047 	hw->started = 1;
1048 	rte_spinlock_unlock(&hw->state_lock);
1049 }
1050 
1051 void
1052 virtio_user_dev_delayed_disconnect_handler(void *param)
1053 {
1054 	struct virtio_user_dev *dev = param;
1055 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1056 
1057 	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
1058 		PMD_DRV_LOG(ERR, "interrupt disable failed");
1059 		return;
1060 	}
1061 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
1062 		    rte_intr_fd_get(eth_dev->intr_handle));
1063 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
1064 					 virtio_interrupt_handler,
1065 					 eth_dev) != 1)
1066 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
1067 
1068 	if (dev->is_server) {
1069 		if (dev->ops->server_disconnect)
1070 			dev->ops->server_disconnect(dev);
1071 
1072 		rte_intr_fd_set(eth_dev->intr_handle,
1073 			dev->ops->get_intr_fd(dev));
1074 
1075 		PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
1076 			    rte_intr_fd_get(eth_dev->intr_handle));
1077 
1078 		if (rte_intr_callback_register(eth_dev->intr_handle,
1079 					       virtio_interrupt_handler,
1080 					       eth_dev))
1081 			PMD_DRV_LOG(ERR, "interrupt register failed");
1082 
1083 		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
1084 			PMD_DRV_LOG(ERR, "interrupt enable failed");
1085 			return;
1086 		}
1087 	}
1088 }
1089 
1090 static void
1091 virtio_user_dev_delayed_intr_reconfig_handler(void *param)
1092 {
1093 	struct virtio_user_dev *dev = param;
1094 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1095 
1096 	PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d",
1097 		    rte_intr_fd_get(eth_dev->intr_handle));
1098 
1099 	if (rte_intr_callback_unregister(eth_dev->intr_handle,
1100 					 virtio_interrupt_handler,
1101 					 eth_dev) != 1)
1102 		PMD_DRV_LOG(ERR, "interrupt unregister failed");
1103 
1104 	rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev));
1105 
1106 	PMD_DRV_LOG(DEBUG, "Registering intr fd: %d",
1107 		    rte_intr_fd_get(eth_dev->intr_handle));
1108 
1109 	if (rte_intr_callback_register(eth_dev->intr_handle,
1110 				       virtio_interrupt_handler, eth_dev))
1111 		PMD_DRV_LOG(ERR, "interrupt register failed");
1112 
1113 	if (rte_intr_enable(eth_dev->intr_handle) < 0)
1114 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1115 }
1116 
1117 int
1118 virtio_user_dev_server_reconnect(struct virtio_user_dev *dev)
1119 {
1120 	int ret, old_status;
1121 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id];
1122 	struct virtio_hw *hw = &dev->hw;
1123 
1124 	if (!dev->ops->server_reconnect) {
1125 		PMD_DRV_LOG(ERR, "(%s) Missing server reconnect callback", dev->path);
1126 		return -1;
1127 	}
1128 
1129 	if (dev->ops->server_reconnect(dev)) {
1130 		PMD_DRV_LOG(ERR, "(%s) Reconnect callback call failed", dev->path);
1131 		return -1;
1132 	}
1133 
1134 	old_status = dev->status;
1135 
1136 	virtio_reset(hw);
1137 
1138 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1139 
1140 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1141 
1142 	if (dev->ops->get_features(dev, &dev->device_features) < 0) {
1143 		PMD_INIT_LOG(ERR, "get_features failed: %s",
1144 			     strerror(errno));
1145 		return -1;
1146 	}
1147 
1148 	/* unmask vhost-user unsupported features */
1149 	dev->device_features &= ~(dev->unsupported_features);
1150 
1151 	dev->features &= (dev->device_features | dev->frontend_features);
1152 
1153 	/* For packed ring, resetting queues is required in reconnection. */
1154 	if (virtio_with_packed_queue(hw) &&
1155 	   (old_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) {
1156 		PMD_INIT_LOG(NOTICE, "Packets on the fly will be dropped"
1157 				" when packed ring reconnecting.");
1158 		virtio_user_dev_reset_queues_packed(eth_dev);
1159 	}
1160 
1161 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1162 
1163 	/* Start the device */
1164 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
1165 	if (!dev->started)
1166 		return -1;
1167 
1168 	if (dev->queue_pairs > 1) {
1169 		ret = virtio_user_handle_mq(dev, dev->queue_pairs);
1170 		if (ret != 0) {
1171 			PMD_INIT_LOG(ERR, "Fails to enable multi-queue pairs!");
1172 			return -1;
1173 		}
1174 	}
1175 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1176 		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
1177 			PMD_DRV_LOG(ERR, "interrupt disable failed");
1178 			return -1;
1179 		}
1180 		/*
1181 		 * This function can be called from the interrupt handler, so
1182 		 * we can't unregister interrupt handler here.  Setting
1183 		 * alarm to do that later.
1184 		 */
1185 		rte_eal_alarm_set(1,
1186 			virtio_user_dev_delayed_intr_reconfig_handler,
1187 			(void *)dev);
1188 	}
1189 	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
1190 	return 0;
1191 }
1192