xref: /dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c (revision 34f3966c7f81f947e9eebb347dec6a9f68eec4e6)
15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
25566a3e3SBruce Richardson  * Copyright(c) 2010-2016 Intel Corporation
337a7eb2aSJianfeng Tan  */
437a7eb2aSJianfeng Tan 
537a7eb2aSJianfeng Tan #include <stdint.h>
637a7eb2aSJianfeng Tan #include <stdio.h>
737a7eb2aSJianfeng Tan #include <fcntl.h>
837a7eb2aSJianfeng Tan #include <string.h>
937a7eb2aSJianfeng Tan #include <errno.h>
1037a7eb2aSJianfeng Tan #include <sys/mman.h>
1137a7eb2aSJianfeng Tan #include <unistd.h>
1237a7eb2aSJianfeng Tan #include <sys/eventfd.h>
1333d24d65SJianfeng Tan #include <sys/types.h>
1433d24d65SJianfeng Tan #include <sys/stat.h>
1537a7eb2aSJianfeng Tan 
167ff26957STiwei Bie #include <rte_eal_memconfig.h>
177ff26957STiwei Bie 
1837a7eb2aSJianfeng Tan #include "vhost.h"
1937a7eb2aSJianfeng Tan #include "virtio_user_dev.h"
2037a7eb2aSJianfeng Tan #include "../virtio_ethdev.h"
2137a7eb2aSJianfeng Tan 
2212ecb2f6SMaxime Coquelin #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb"
2312ecb2f6SMaxime Coquelin 
2437a7eb2aSJianfeng Tan static int
2557ae79a7SJianfeng Tan virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
2657ae79a7SJianfeng Tan {
2757ae79a7SJianfeng Tan 	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
2857ae79a7SJianfeng Tan 	 * firstly because vhost depends on this msg to allocate virtqueue
2957ae79a7SJianfeng Tan 	 * pair.
3057ae79a7SJianfeng Tan 	 */
3157ae79a7SJianfeng Tan 	struct vhost_vring_file file;
3257ae79a7SJianfeng Tan 
3357ae79a7SJianfeng Tan 	file.index = queue_sel;
34e6e7ad8bSJianfeng Tan 	file.fd = dev->callfds[queue_sel];
3533d24d65SJianfeng Tan 	dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file);
3657ae79a7SJianfeng Tan 
3757ae79a7SJianfeng Tan 	return 0;
3857ae79a7SJianfeng Tan }
3957ae79a7SJianfeng Tan 
4057ae79a7SJianfeng Tan static int
4137a7eb2aSJianfeng Tan virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
4237a7eb2aSJianfeng Tan {
4337a7eb2aSJianfeng Tan 	struct vhost_vring_file file;
4437a7eb2aSJianfeng Tan 	struct vhost_vring_state state;
4537a7eb2aSJianfeng Tan 	struct vring *vring = &dev->vrings[queue_sel];
4637a7eb2aSJianfeng Tan 	struct vhost_vring_addr addr = {
4737a7eb2aSJianfeng Tan 		.index = queue_sel,
4837a7eb2aSJianfeng Tan 		.desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
4937a7eb2aSJianfeng Tan 		.avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
5037a7eb2aSJianfeng Tan 		.used_user_addr = (uint64_t)(uintptr_t)vring->used,
5137a7eb2aSJianfeng Tan 		.log_guest_addr = 0,
5237a7eb2aSJianfeng Tan 		.flags = 0, /* disable log */
5337a7eb2aSJianfeng Tan 	};
5437a7eb2aSJianfeng Tan 
5537a7eb2aSJianfeng Tan 	state.index = queue_sel;
5637a7eb2aSJianfeng Tan 	state.num = vring->num;
5733d24d65SJianfeng Tan 	dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state);
5837a7eb2aSJianfeng Tan 
59be7a4707SJianfeng Tan 	state.index = queue_sel;
6037a7eb2aSJianfeng Tan 	state.num = 0; /* no reservation */
61*34f3966cSYuanhan Liu 	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
62*34f3966cSYuanhan Liu 		state.num |= (1 << 15);
6333d24d65SJianfeng Tan 	dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state);
6437a7eb2aSJianfeng Tan 
6533d24d65SJianfeng Tan 	dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr);
6637a7eb2aSJianfeng Tan 
6737a7eb2aSJianfeng Tan 	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
6837a7eb2aSJianfeng Tan 	 * lastly because vhost depends on this msg to judge if
6937a7eb2aSJianfeng Tan 	 * virtio is ready.
7037a7eb2aSJianfeng Tan 	 */
7157ae79a7SJianfeng Tan 	file.index = queue_sel;
72e6e7ad8bSJianfeng Tan 	file.fd = dev->kickfds[queue_sel];
7333d24d65SJianfeng Tan 	dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file);
7437a7eb2aSJianfeng Tan 
7537a7eb2aSJianfeng Tan 	return 0;
7637a7eb2aSJianfeng Tan }
7737a7eb2aSJianfeng Tan 
7857ae79a7SJianfeng Tan static int
7957ae79a7SJianfeng Tan virtio_user_queue_setup(struct virtio_user_dev *dev,
8057ae79a7SJianfeng Tan 			int (*fn)(struct virtio_user_dev *, uint32_t))
8157ae79a7SJianfeng Tan {
8257ae79a7SJianfeng Tan 	uint32_t i, queue_sel;
8357ae79a7SJianfeng Tan 
8457ae79a7SJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i) {
8557ae79a7SJianfeng Tan 		queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
8657ae79a7SJianfeng Tan 		if (fn(dev, queue_sel) < 0) {
8757ae79a7SJianfeng Tan 			PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i);
8857ae79a7SJianfeng Tan 			return -1;
8957ae79a7SJianfeng Tan 		}
9057ae79a7SJianfeng Tan 	}
9157ae79a7SJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i) {
9257ae79a7SJianfeng Tan 		queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
9357ae79a7SJianfeng Tan 		if (fn(dev, queue_sel) < 0) {
9457ae79a7SJianfeng Tan 			PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i);
9557ae79a7SJianfeng Tan 			return -1;
9657ae79a7SJianfeng Tan 		}
9757ae79a7SJianfeng Tan 	}
9857ae79a7SJianfeng Tan 
9957ae79a7SJianfeng Tan 	return 0;
10057ae79a7SJianfeng Tan }
10157ae79a7SJianfeng Tan 
10237a7eb2aSJianfeng Tan int
103bd8f50a4SZhiyong Yang is_vhost_user_by_type(const char *path)
104bd8f50a4SZhiyong Yang {
105bd8f50a4SZhiyong Yang 	struct stat sb;
106bd8f50a4SZhiyong Yang 
107bd8f50a4SZhiyong Yang 	if (stat(path, &sb) == -1)
108bd8f50a4SZhiyong Yang 		return 0;
109bd8f50a4SZhiyong Yang 
110bd8f50a4SZhiyong Yang 	return S_ISSOCK(sb.st_mode);
111bd8f50a4SZhiyong Yang }
112bd8f50a4SZhiyong Yang 
113bd8f50a4SZhiyong Yang int
11437a7eb2aSJianfeng Tan virtio_user_start_device(struct virtio_user_dev *dev)
11537a7eb2aSJianfeng Tan {
1167ff26957STiwei Bie 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
11737a7eb2aSJianfeng Tan 	uint64_t features;
11837a7eb2aSJianfeng Tan 	int ret;
11937a7eb2aSJianfeng Tan 
1207ff26957STiwei Bie 	/*
1217ff26957STiwei Bie 	 * XXX workaround!
1227ff26957STiwei Bie 	 *
1237ff26957STiwei Bie 	 * We need to make sure that the locks will be
1247ff26957STiwei Bie 	 * taken in the correct order to avoid deadlocks.
1257ff26957STiwei Bie 	 *
1267ff26957STiwei Bie 	 * Before releasing this lock, this thread should
1277ff26957STiwei Bie 	 * not trigger any memory hotplug events.
1287ff26957STiwei Bie 	 *
1297ff26957STiwei Bie 	 * This is a temporary workaround, and should be
1307ff26957STiwei Bie 	 * replaced when we get proper supports from the
1317ff26957STiwei Bie 	 * memory subsystem in the future.
1327ff26957STiwei Bie 	 */
1337ff26957STiwei Bie 	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
13412ecb2f6SMaxime Coquelin 	pthread_mutex_lock(&dev->mutex);
13512ecb2f6SMaxime Coquelin 
136bd8f50a4SZhiyong Yang 	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
13712ecb2f6SMaxime Coquelin 		goto error;
138bd8f50a4SZhiyong Yang 
13957ae79a7SJianfeng Tan 	/* Step 0: tell vhost to create queues */
14057ae79a7SJianfeng Tan 	if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
14137a7eb2aSJianfeng Tan 		goto error;
14237a7eb2aSJianfeng Tan 
14333d24d65SJianfeng Tan 	/* Step 1: set features */
14437a7eb2aSJianfeng Tan 	features = dev->features;
14533d24d65SJianfeng Tan 	/* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
14637a7eb2aSJianfeng Tan 	features &= ~(1ull << VIRTIO_NET_F_MAC);
14733d24d65SJianfeng Tan 	/* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
148142678d4SJianfeng Tan 	features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
14935c4f855SJianfeng Tan 	features &= ~(1ull << VIRTIO_NET_F_STATUS);
15033d24d65SJianfeng Tan 	ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features);
15137a7eb2aSJianfeng Tan 	if (ret < 0)
15237a7eb2aSJianfeng Tan 		goto error;
15337a7eb2aSJianfeng Tan 	PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
15437a7eb2aSJianfeng Tan 
15557ae79a7SJianfeng Tan 	/* Step 2: share memory regions */
15633d24d65SJianfeng Tan 	ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
15757ae79a7SJianfeng Tan 	if (ret < 0)
15857ae79a7SJianfeng Tan 		goto error;
15957ae79a7SJianfeng Tan 
16057ae79a7SJianfeng Tan 	/* Step 3: kick queues */
16157ae79a7SJianfeng Tan 	if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0)
16257ae79a7SJianfeng Tan 		goto error;
16357ae79a7SJianfeng Tan 
16457ae79a7SJianfeng Tan 	/* Step 4: enable queues
16557ae79a7SJianfeng Tan 	 * we enable the 1st queue pair by default.
16657ae79a7SJianfeng Tan 	 */
16733d24d65SJianfeng Tan 	dev->ops->enable_qp(dev, 0, 1);
16857ae79a7SJianfeng Tan 
16912ecb2f6SMaxime Coquelin 	dev->started = true;
17012ecb2f6SMaxime Coquelin 	pthread_mutex_unlock(&dev->mutex);
1717ff26957STiwei Bie 	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
17212ecb2f6SMaxime Coquelin 
17337a7eb2aSJianfeng Tan 	return 0;
17437a7eb2aSJianfeng Tan error:
17512ecb2f6SMaxime Coquelin 	pthread_mutex_unlock(&dev->mutex);
1767ff26957STiwei Bie 	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
17737a7eb2aSJianfeng Tan 	/* TODO: free resource here or caller to check */
17837a7eb2aSJianfeng Tan 	return -1;
17937a7eb2aSJianfeng Tan }
18037a7eb2aSJianfeng Tan 
18137a7eb2aSJianfeng Tan int virtio_user_stop_device(struct virtio_user_dev *dev)
18237a7eb2aSJianfeng Tan {
18374dc6746STiwei Bie 	struct vhost_vring_state state;
184c12a26eeSJianfeng Tan 	uint32_t i;
18574dc6746STiwei Bie 	int error = 0;
186c12a26eeSJianfeng Tan 
18712ecb2f6SMaxime Coquelin 	pthread_mutex_lock(&dev->mutex);
188f457e900STiwei Bie 	if (!dev->started)
189f457e900STiwei Bie 		goto out;
190f457e900STiwei Bie 
191c12a26eeSJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i)
19233d24d65SJianfeng Tan 		dev->ops->enable_qp(dev, i, 0);
193c12a26eeSJianfeng Tan 
19474dc6746STiwei Bie 	/* Stop the backend. */
19574dc6746STiwei Bie 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
19674dc6746STiwei Bie 		state.index = i;
19774dc6746STiwei Bie 		if (dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE,
19874dc6746STiwei Bie 					   &state) < 0) {
19974dc6746STiwei Bie 			PMD_DRV_LOG(ERR, "get_vring_base failed, index=%u\n",
20074dc6746STiwei Bie 				    i);
20174dc6746STiwei Bie 			error = -1;
20274dc6746STiwei Bie 			goto out;
2030d6a8752SJianfeng Tan 		}
20474dc6746STiwei Bie 	}
20574dc6746STiwei Bie 
20612ecb2f6SMaxime Coquelin 	dev->started = false;
207f457e900STiwei Bie out:
20812ecb2f6SMaxime Coquelin 	pthread_mutex_unlock(&dev->mutex);
2090d6a8752SJianfeng Tan 
21074dc6746STiwei Bie 	return error;
21137a7eb2aSJianfeng Tan }
21237a7eb2aSJianfeng Tan 
21337a7eb2aSJianfeng Tan static inline void
21437a7eb2aSJianfeng Tan parse_mac(struct virtio_user_dev *dev, const char *mac)
21537a7eb2aSJianfeng Tan {
21637a7eb2aSJianfeng Tan 	int i, r;
21737a7eb2aSJianfeng Tan 	uint32_t tmp[ETHER_ADDR_LEN];
21837a7eb2aSJianfeng Tan 
21937a7eb2aSJianfeng Tan 	if (!mac)
22037a7eb2aSJianfeng Tan 		return;
22137a7eb2aSJianfeng Tan 
22237a7eb2aSJianfeng Tan 	r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0],
22337a7eb2aSJianfeng Tan 			&tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
22437a7eb2aSJianfeng Tan 	if (r == ETHER_ADDR_LEN) {
22537a7eb2aSJianfeng Tan 		for (i = 0; i < ETHER_ADDR_LEN; ++i)
22637a7eb2aSJianfeng Tan 			dev->mac_addr[i] = (uint8_t)tmp[i];
22737a7eb2aSJianfeng Tan 		dev->mac_specified = 1;
22837a7eb2aSJianfeng Tan 	} else {
22937a7eb2aSJianfeng Tan 		/* ignore the wrong mac, use random mac */
23037a7eb2aSJianfeng Tan 		PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac);
23137a7eb2aSJianfeng Tan 	}
23237a7eb2aSJianfeng Tan }
23337a7eb2aSJianfeng Tan 
23433d24d65SJianfeng Tan static int
235e6e7ad8bSJianfeng Tan virtio_user_dev_init_notify(struct virtio_user_dev *dev)
23633d24d65SJianfeng Tan {
237e6e7ad8bSJianfeng Tan 	uint32_t i, j;
238e6e7ad8bSJianfeng Tan 	int callfd;
239e6e7ad8bSJianfeng Tan 	int kickfd;
24033d24d65SJianfeng Tan 
2412269b9aeSWenfeng Liu 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; ++i) {
242e6e7ad8bSJianfeng Tan 		if (i >= dev->max_queue_pairs * 2) {
24333d24d65SJianfeng Tan 			dev->kickfds[i] = -1;
24433d24d65SJianfeng Tan 			dev->callfds[i] = -1;
245e6e7ad8bSJianfeng Tan 			continue;
24633d24d65SJianfeng Tan 		}
24733d24d65SJianfeng Tan 
248e6e7ad8bSJianfeng Tan 		/* May use invalid flag, but some backend uses kickfd and
249e6e7ad8bSJianfeng Tan 		 * callfd as criteria to judge if dev is alive. so finally we
250e6e7ad8bSJianfeng Tan 		 * use real event_fd.
251e6e7ad8bSJianfeng Tan 		 */
252e6e7ad8bSJianfeng Tan 		callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
253e6e7ad8bSJianfeng Tan 		if (callfd < 0) {
254e6e7ad8bSJianfeng Tan 			PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno));
255e6e7ad8bSJianfeng Tan 			break;
256e6e7ad8bSJianfeng Tan 		}
257e6e7ad8bSJianfeng Tan 		kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
258e6e7ad8bSJianfeng Tan 		if (kickfd < 0) {
259e6e7ad8bSJianfeng Tan 			PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno));
260e6e7ad8bSJianfeng Tan 			break;
261e6e7ad8bSJianfeng Tan 		}
262e6e7ad8bSJianfeng Tan 		dev->callfds[i] = callfd;
263e6e7ad8bSJianfeng Tan 		dev->kickfds[i] = kickfd;
264e6e7ad8bSJianfeng Tan 	}
265e6e7ad8bSJianfeng Tan 
266e6e7ad8bSJianfeng Tan 	if (i < VIRTIO_MAX_VIRTQUEUES) {
267e6e7ad8bSJianfeng Tan 		for (j = 0; j <= i; ++j) {
268e6e7ad8bSJianfeng Tan 			close(dev->callfds[j]);
269e6e7ad8bSJianfeng Tan 			close(dev->kickfds[j]);
270e6e7ad8bSJianfeng Tan 		}
271e6e7ad8bSJianfeng Tan 
272e6e7ad8bSJianfeng Tan 		return -1;
273e6e7ad8bSJianfeng Tan 	}
274e6e7ad8bSJianfeng Tan 
275e6e7ad8bSJianfeng Tan 	return 0;
276e6e7ad8bSJianfeng Tan }
277e6e7ad8bSJianfeng Tan 
278e6e7ad8bSJianfeng Tan static int
2793d4fb6fdSJianfeng Tan virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
2803d4fb6fdSJianfeng Tan {
2813d4fb6fdSJianfeng Tan 	uint32_t i;
2823d4fb6fdSJianfeng Tan 	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
2833d4fb6fdSJianfeng Tan 
2843d4fb6fdSJianfeng Tan 	if (!eth_dev->intr_handle) {
2853d4fb6fdSJianfeng Tan 		eth_dev->intr_handle = malloc(sizeof(*eth_dev->intr_handle));
2863d4fb6fdSJianfeng Tan 		if (!eth_dev->intr_handle) {
2873d4fb6fdSJianfeng Tan 			PMD_DRV_LOG(ERR, "fail to allocate intr_handle");
2883d4fb6fdSJianfeng Tan 			return -1;
2893d4fb6fdSJianfeng Tan 		}
2903d4fb6fdSJianfeng Tan 		memset(eth_dev->intr_handle, 0, sizeof(*eth_dev->intr_handle));
2913d4fb6fdSJianfeng Tan 	}
2923d4fb6fdSJianfeng Tan 
2933d4fb6fdSJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i)
2943d4fb6fdSJianfeng Tan 		eth_dev->intr_handle->efds[i] = dev->callfds[i];
2953d4fb6fdSJianfeng Tan 	eth_dev->intr_handle->nb_efd = dev->max_queue_pairs;
2963d4fb6fdSJianfeng Tan 	eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1;
2973d4fb6fdSJianfeng Tan 	eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
29829906b97SJingjing Wu 	/* For virtio vdev, no need to read counter for clean */
29929906b97SJingjing Wu 	eth_dev->intr_handle->efd_counter_size = 0;
300eae6e70cSJianfeng Tan 	eth_dev->intr_handle->fd = -1;
301cc4690e9SJianfeng Tan 	if (dev->vhostfd >= 0)
302cc4690e9SJianfeng Tan 		eth_dev->intr_handle->fd = dev->vhostfd;
303bd8f50a4SZhiyong Yang 	else if (dev->is_server)
304bd8f50a4SZhiyong Yang 		eth_dev->intr_handle->fd = dev->listenfd;
3053d4fb6fdSJianfeng Tan 
3063d4fb6fdSJianfeng Tan 	return 0;
3073d4fb6fdSJianfeng Tan }
3083d4fb6fdSJianfeng Tan 
30912ecb2f6SMaxime Coquelin static void
31012ecb2f6SMaxime Coquelin virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
31112ecb2f6SMaxime Coquelin 						 const void *addr __rte_unused,
31212ecb2f6SMaxime Coquelin 						 size_t len __rte_unused,
31312ecb2f6SMaxime Coquelin 						 void *arg)
31412ecb2f6SMaxime Coquelin {
31512ecb2f6SMaxime Coquelin 	struct virtio_user_dev *dev = arg;
316f32c7c9dSAnatoly Burakov 	struct rte_memseg_list *msl;
31712ecb2f6SMaxime Coquelin 	uint16_t i;
31812ecb2f6SMaxime Coquelin 
319f32c7c9dSAnatoly Burakov 	/* ignore externally allocated memory */
320f32c7c9dSAnatoly Burakov 	msl = rte_mem_virt2memseg_list(addr);
321f32c7c9dSAnatoly Burakov 	if (msl->external)
322f32c7c9dSAnatoly Burakov 		return;
323f32c7c9dSAnatoly Burakov 
32412ecb2f6SMaxime Coquelin 	pthread_mutex_lock(&dev->mutex);
32512ecb2f6SMaxime Coquelin 
32612ecb2f6SMaxime Coquelin 	if (dev->started == false)
32712ecb2f6SMaxime Coquelin 		goto exit;
32812ecb2f6SMaxime Coquelin 
32912ecb2f6SMaxime Coquelin 	/* Step 1: pause the active queues */
33012ecb2f6SMaxime Coquelin 	for (i = 0; i < dev->queue_pairs; i++)
33112ecb2f6SMaxime Coquelin 		dev->ops->enable_qp(dev, i, 0);
33212ecb2f6SMaxime Coquelin 
33312ecb2f6SMaxime Coquelin 	/* Step 2: update memory regions */
33412ecb2f6SMaxime Coquelin 	dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
33512ecb2f6SMaxime Coquelin 
33612ecb2f6SMaxime Coquelin 	/* Step 3: resume the active queues */
33712ecb2f6SMaxime Coquelin 	for (i = 0; i < dev->queue_pairs; i++)
33812ecb2f6SMaxime Coquelin 		dev->ops->enable_qp(dev, i, 1);
33912ecb2f6SMaxime Coquelin 
34012ecb2f6SMaxime Coquelin exit:
34112ecb2f6SMaxime Coquelin 	pthread_mutex_unlock(&dev->mutex);
34212ecb2f6SMaxime Coquelin }
34312ecb2f6SMaxime Coquelin 
3443d4fb6fdSJianfeng Tan static int
345e6e7ad8bSJianfeng Tan virtio_user_dev_setup(struct virtio_user_dev *dev)
346e6e7ad8bSJianfeng Tan {
347e6e7ad8bSJianfeng Tan 	uint32_t q;
348e6e7ad8bSJianfeng Tan 
349e6e7ad8bSJianfeng Tan 	dev->vhostfd = -1;
350e3b43481SJianfeng Tan 	dev->vhostfds = NULL;
351e3b43481SJianfeng Tan 	dev->tapfds = NULL;
352e3b43481SJianfeng Tan 
353bd8f50a4SZhiyong Yang 	if (dev->is_server) {
354bd8f50a4SZhiyong Yang 		if (access(dev->path, F_OK) == 0 &&
355bd8f50a4SZhiyong Yang 		    !is_vhost_user_by_type(dev->path)) {
356bd8f50a4SZhiyong Yang 			PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!");
357bd8f50a4SZhiyong Yang 			return -1;
358bd8f50a4SZhiyong Yang 		}
359520dd992SFerruh Yigit 		dev->ops = &virtio_ops_user;
360bd8f50a4SZhiyong Yang 	} else {
36133d24d65SJianfeng Tan 		if (is_vhost_user_by_type(dev->path)) {
362520dd992SFerruh Yigit 			dev->ops = &virtio_ops_user;
363e3b43481SJianfeng Tan 		} else {
364520dd992SFerruh Yigit 			dev->ops = &virtio_ops_kernel;
365e3b43481SJianfeng Tan 
366bd8f50a4SZhiyong Yang 			dev->vhostfds = malloc(dev->max_queue_pairs *
367bd8f50a4SZhiyong Yang 					       sizeof(int));
368bd8f50a4SZhiyong Yang 			dev->tapfds = malloc(dev->max_queue_pairs *
369bd8f50a4SZhiyong Yang 					     sizeof(int));
370e3b43481SJianfeng Tan 			if (!dev->vhostfds || !dev->tapfds) {
371e3b43481SJianfeng Tan 				PMD_INIT_LOG(ERR, "Failed to malloc");
372e3b43481SJianfeng Tan 				return -1;
37333d24d65SJianfeng Tan 			}
37433d24d65SJianfeng Tan 
375e3b43481SJianfeng Tan 			for (q = 0; q < dev->max_queue_pairs; ++q) {
376e3b43481SJianfeng Tan 				dev->vhostfds[q] = -1;
377e3b43481SJianfeng Tan 				dev->tapfds[q] = -1;
378e3b43481SJianfeng Tan 			}
379e3b43481SJianfeng Tan 		}
380bd8f50a4SZhiyong Yang 	}
381e3b43481SJianfeng Tan 
382cc4690e9SJianfeng Tan 	if (dev->ops->setup(dev) < 0)
383cc4690e9SJianfeng Tan 		return -1;
384cc4690e9SJianfeng Tan 
385cc4690e9SJianfeng Tan 	if (virtio_user_dev_init_notify(dev) < 0)
386cc4690e9SJianfeng Tan 		return -1;
387cc4690e9SJianfeng Tan 
388cc4690e9SJianfeng Tan 	if (virtio_user_fill_intr_handle(dev) < 0)
389cc4690e9SJianfeng Tan 		return -1;
390cc4690e9SJianfeng Tan 
391cc4690e9SJianfeng Tan 	return 0;
39233d24d65SJianfeng Tan }
39333d24d65SJianfeng Tan 
394bed3b24cSJianfeng Tan /* Use below macro to filter features from vhost backend */
395bed3b24cSJianfeng Tan #define VIRTIO_USER_SUPPORTED_FEATURES			\
396bed3b24cSJianfeng Tan 	(1ULL << VIRTIO_NET_F_MAC		|	\
397bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_STATUS		|	\
398bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_MQ		|	\
399bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR	|	\
400bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_CTRL_VQ		|	\
401bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_CTRL_RX		|	\
402bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_CTRL_VLAN		|	\
403bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_CSUM		|	\
404bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_HOST_TSO4		|	\
405bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_HOST_TSO6		|	\
406bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_MRG_RXBUF		|	\
407bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_RING_F_INDIRECT_DESC	|	\
408bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_GUEST_CSUM	|	\
409bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_GUEST_TSO4	|	\
410bed3b24cSJianfeng Tan 	 1ULL << VIRTIO_NET_F_GUEST_TSO6	|	\
41141e45c90SMarvin Liu 	 1ULL << VIRTIO_F_IN_ORDER		|	\
412*34f3966cSYuanhan Liu 	 1ULL << VIRTIO_F_VERSION_1		|	\
413*34f3966cSYuanhan Liu 	 1ULL << VIRTIO_F_RING_PACKED)
414bed3b24cSJianfeng Tan 
41537a7eb2aSJianfeng Tan int
41637a7eb2aSJianfeng Tan virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
417488ed97aSMarvin Liu 		     int cq, int queue_size, const char *mac, char **ifname,
418*34f3966cSYuanhan Liu 		     int mrg_rxbuf, int in_order, int packed_vq)
41937a7eb2aSJianfeng Tan {
42012ecb2f6SMaxime Coquelin 	pthread_mutex_init(&dev->mutex, NULL);
421542849c0SJianfeng Tan 	snprintf(dev->path, PATH_MAX, "%s", path);
42212ecb2f6SMaxime Coquelin 	dev->started = 0;
42337a7eb2aSJianfeng Tan 	dev->max_queue_pairs = queues;
42437a7eb2aSJianfeng Tan 	dev->queue_pairs = 1; /* mq disabled by default */
42537a7eb2aSJianfeng Tan 	dev->queue_size = queue_size;
42637a7eb2aSJianfeng Tan 	dev->mac_specified = 0;
427bb97d2ddSTiwei Bie 	dev->frontend_features = 0;
428bd9568f3STiwei Bie 	dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES;
42937a7eb2aSJianfeng Tan 	parse_mac(dev, mac);
43037a7eb2aSJianfeng Tan 
4314214a1b4SWenfeng Liu 	if (*ifname) {
4324214a1b4SWenfeng Liu 		dev->ifname = *ifname;
4334214a1b4SWenfeng Liu 		*ifname = NULL;
4344214a1b4SWenfeng Liu 	}
4354214a1b4SWenfeng Liu 
43633d24d65SJianfeng Tan 	if (virtio_user_dev_setup(dev) < 0) {
43737a7eb2aSJianfeng Tan 		PMD_INIT_LOG(ERR, "backend set up fails");
43837a7eb2aSJianfeng Tan 		return -1;
43937a7eb2aSJianfeng Tan 	}
440bce7e905SJianfeng Tan 
4416e1e5904SJiayu Hu 	if (!dev->is_server) {
442bd8f50a4SZhiyong Yang 		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
443bd8f50a4SZhiyong Yang 					   NULL) < 0) {
444bd8f50a4SZhiyong Yang 			PMD_INIT_LOG(ERR, "set_owner fails: %s",
445bd8f50a4SZhiyong Yang 				     strerror(errno));
44637a7eb2aSJianfeng Tan 			return -1;
44737a7eb2aSJianfeng Tan 		}
44837a7eb2aSJianfeng Tan 
44933d24d65SJianfeng Tan 		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
450142678d4SJianfeng Tan 					   &dev->device_features) < 0) {
451bd8f50a4SZhiyong Yang 			PMD_INIT_LOG(ERR, "get_features failed: %s",
452bd8f50a4SZhiyong Yang 				     strerror(errno));
45337a7eb2aSJianfeng Tan 			return -1;
45437a7eb2aSJianfeng Tan 		}
455bd8f50a4SZhiyong Yang 	} else {
456bd8f50a4SZhiyong Yang 		/* We just pretend vhost-user can support all these features.
457bd8f50a4SZhiyong Yang 		 * Note that this could be problematic that if some feature is
458bd8f50a4SZhiyong Yang 		 * negotiated but not supported by the vhost-user which comes
459bd8f50a4SZhiyong Yang 		 * later.
460bd8f50a4SZhiyong Yang 		 */
461bd8f50a4SZhiyong Yang 		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
462bd8f50a4SZhiyong Yang 	}
463bd8f50a4SZhiyong Yang 
464bd9568f3STiwei Bie 	if (!mrg_rxbuf)
465488ed97aSMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
466488ed97aSMarvin Liu 
467bd9568f3STiwei Bie 	if (!in_order)
468488ed97aSMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
469488ed97aSMarvin Liu 
470*34f3966cSYuanhan Liu 	if (packed_vq)
471*34f3966cSYuanhan Liu 		dev->device_features |= (1ull << VIRTIO_F_RING_PACKED);
472bd9568f3STiwei Bie 	else
473*34f3966cSYuanhan Liu 		dev->device_features &= ~(1ull << VIRTIO_F_RING_PACKED);
474*34f3966cSYuanhan Liu 
475*34f3966cSYuanhan Liu 	if (dev->mac_specified) {
476*34f3966cSYuanhan Liu 		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
477*34f3966cSYuanhan Liu 	} else {
478*34f3966cSYuanhan Liu 		dev->device_features &= ~(1ull << VIRTIO_NET_F_MAC);
4797c66ff61SMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
480*34f3966cSYuanhan Liu 	}
481f9b9d1a5SJianfeng Tan 
482142678d4SJianfeng Tan 	if (cq) {
483142678d4SJianfeng Tan 		/* device does not really need to know anything about CQ,
484142678d4SJianfeng Tan 		 * so if necessary, we just claim to support CQ
485f9b9d1a5SJianfeng Tan 		 */
486bb97d2ddSTiwei Bie 		dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
487142678d4SJianfeng Tan 	} else {
4887c66ff61SMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
489bd9568f3STiwei Bie 		/* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
4907c66ff61SMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
4917c66ff61SMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
4927c66ff61SMarvin Liu 		dev->unsupported_features |=
4937c66ff61SMarvin Liu 			(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
4947c66ff61SMarvin Liu 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
4957c66ff61SMarvin Liu 		dev->unsupported_features |=
4967c66ff61SMarvin Liu 			(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
497f9b9d1a5SJianfeng Tan 	}
498f9b9d1a5SJianfeng Tan 
49935c4f855SJianfeng Tan 	/* The backend will not report this feature, we add it explicitly */
5004bf9e26dSJianfeng Tan 	if (is_vhost_user_by_type(dev->path))
501bb97d2ddSTiwei Bie 		dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
50235c4f855SJianfeng Tan 
503bd9568f3STiwei Bie 	/*
504bd9568f3STiwei Bie 	 * Device features =
505bd9568f3STiwei Bie 	 *     (frontend_features | backend_features) & ~unsupported_features;
506bd9568f3STiwei Bie 	 */
507bb97d2ddSTiwei Bie 	dev->device_features |= dev->frontend_features;
508bd9568f3STiwei Bie 	dev->device_features &= ~dev->unsupported_features;
509bed3b24cSJianfeng Tan 
51012ecb2f6SMaxime Coquelin 	if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
51112ecb2f6SMaxime Coquelin 				virtio_user_mem_event_cb, dev)) {
51288e5469fSXiao Wang 		if (rte_errno != ENOTSUP) {
51388e5469fSXiao Wang 			PMD_INIT_LOG(ERR, "Failed to register mem event"
51488e5469fSXiao Wang 					" callback\n");
51512ecb2f6SMaxime Coquelin 			return -1;
51612ecb2f6SMaxime Coquelin 		}
51788e5469fSXiao Wang 	}
51812ecb2f6SMaxime Coquelin 
51937a7eb2aSJianfeng Tan 	return 0;
52037a7eb2aSJianfeng Tan }
52137a7eb2aSJianfeng Tan 
52237a7eb2aSJianfeng Tan void
52337a7eb2aSJianfeng Tan virtio_user_dev_uninit(struct virtio_user_dev *dev)
52437a7eb2aSJianfeng Tan {
525e3b43481SJianfeng Tan 	uint32_t i;
526e3b43481SJianfeng Tan 
527e3b43481SJianfeng Tan 	virtio_user_stop_device(dev);
528e3b43481SJianfeng Tan 
52912ecb2f6SMaxime Coquelin 	rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev);
53012ecb2f6SMaxime Coquelin 
531e6e7ad8bSJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
532e6e7ad8bSJianfeng Tan 		close(dev->callfds[i]);
533e6e7ad8bSJianfeng Tan 		close(dev->kickfds[i]);
534e6e7ad8bSJianfeng Tan 	}
535e6e7ad8bSJianfeng Tan 
53637a7eb2aSJianfeng Tan 	close(dev->vhostfd);
537e3b43481SJianfeng Tan 
538bd8f50a4SZhiyong Yang 	if (dev->is_server && dev->listenfd >= 0) {
539bd8f50a4SZhiyong Yang 		close(dev->listenfd);
540bd8f50a4SZhiyong Yang 		dev->listenfd = -1;
541bd8f50a4SZhiyong Yang 	}
542bd8f50a4SZhiyong Yang 
543e3b43481SJianfeng Tan 	if (dev->vhostfds) {
544e3b43481SJianfeng Tan 		for (i = 0; i < dev->max_queue_pairs; ++i)
545e3b43481SJianfeng Tan 			close(dev->vhostfds[i]);
546e3b43481SJianfeng Tan 		free(dev->vhostfds);
547e3b43481SJianfeng Tan 		free(dev->tapfds);
548e3b43481SJianfeng Tan 	}
5494214a1b4SWenfeng Liu 
5504214a1b4SWenfeng Liu 	free(dev->ifname);
551bd8f50a4SZhiyong Yang 
552bd8f50a4SZhiyong Yang 	if (dev->is_server)
553bd8f50a4SZhiyong Yang 		unlink(dev->path);
55437a7eb2aSJianfeng Tan }
555f9b9d1a5SJianfeng Tan 
556201a4165SZhiyong Yang uint8_t
557f9b9d1a5SJianfeng Tan virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
558f9b9d1a5SJianfeng Tan {
559f9b9d1a5SJianfeng Tan 	uint16_t i;
560f9b9d1a5SJianfeng Tan 	uint8_t ret = 0;
561f9b9d1a5SJianfeng Tan 
562f9b9d1a5SJianfeng Tan 	if (q_pairs > dev->max_queue_pairs) {
563f9b9d1a5SJianfeng Tan 		PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported",
564f9b9d1a5SJianfeng Tan 			     q_pairs, dev->max_queue_pairs);
565f9b9d1a5SJianfeng Tan 		return -1;
566f9b9d1a5SJianfeng Tan 	}
567f9b9d1a5SJianfeng Tan 
568201a4165SZhiyong Yang 	/* Server mode can't enable queue pairs if vhostfd is invalid,
569201a4165SZhiyong Yang 	 * always return 0 in this case.
570201a4165SZhiyong Yang 	 */
57121b90f79STiwei Bie 	if (!dev->is_server || dev->vhostfd >= 0) {
572f9b9d1a5SJianfeng Tan 		for (i = 0; i < q_pairs; ++i)
57333d24d65SJianfeng Tan 			ret |= dev->ops->enable_qp(dev, i, 1);
574f9b9d1a5SJianfeng Tan 		for (i = q_pairs; i < dev->max_queue_pairs; ++i)
57533d24d65SJianfeng Tan 			ret |= dev->ops->enable_qp(dev, i, 0);
576201a4165SZhiyong Yang 	}
577f9b9d1a5SJianfeng Tan 	dev->queue_pairs = q_pairs;
578f9b9d1a5SJianfeng Tan 
579f9b9d1a5SJianfeng Tan 	return ret;
580f9b9d1a5SJianfeng Tan }
581f9b9d1a5SJianfeng Tan 
582f9b9d1a5SJianfeng Tan static uint32_t
583f9b9d1a5SJianfeng Tan virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
584f9b9d1a5SJianfeng Tan 			    uint16_t idx_hdr)
585f9b9d1a5SJianfeng Tan {
586f9b9d1a5SJianfeng Tan 	struct virtio_net_ctrl_hdr *hdr;
587f9b9d1a5SJianfeng Tan 	virtio_net_ctrl_ack status = ~0;
588f9b9d1a5SJianfeng Tan 	uint16_t i, idx_data, idx_status;
589f9b9d1a5SJianfeng Tan 	uint32_t n_descs = 0;
590f9b9d1a5SJianfeng Tan 
591f9b9d1a5SJianfeng Tan 	/* locate desc for header, data, and status */
592f9b9d1a5SJianfeng Tan 	idx_data = vring->desc[idx_hdr].next;
593f9b9d1a5SJianfeng Tan 	n_descs++;
594f9b9d1a5SJianfeng Tan 
595f9b9d1a5SJianfeng Tan 	i = idx_data;
596f9b9d1a5SJianfeng Tan 	while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
597f9b9d1a5SJianfeng Tan 		i = vring->desc[i].next;
598f9b9d1a5SJianfeng Tan 		n_descs++;
599f9b9d1a5SJianfeng Tan 	}
600f9b9d1a5SJianfeng Tan 
601f9b9d1a5SJianfeng Tan 	/* locate desc for status */
602f9b9d1a5SJianfeng Tan 	idx_status = i;
603f9b9d1a5SJianfeng Tan 	n_descs++;
604f9b9d1a5SJianfeng Tan 
605f9b9d1a5SJianfeng Tan 	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
606f9b9d1a5SJianfeng Tan 	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
607f9b9d1a5SJianfeng Tan 	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
608f9b9d1a5SJianfeng Tan 		uint16_t queues;
609f9b9d1a5SJianfeng Tan 
610f9b9d1a5SJianfeng Tan 		queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
611f9b9d1a5SJianfeng Tan 		status = virtio_user_handle_mq(dev, queues);
612f9b9d1a5SJianfeng Tan 	}
613f9b9d1a5SJianfeng Tan 
614f9b9d1a5SJianfeng Tan 	/* Update status */
615f9b9d1a5SJianfeng Tan 	*(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
616f9b9d1a5SJianfeng Tan 
617f9b9d1a5SJianfeng Tan 	return n_descs;
618f9b9d1a5SJianfeng Tan }
619f9b9d1a5SJianfeng Tan 
620f9b9d1a5SJianfeng Tan void
621f9b9d1a5SJianfeng Tan virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
622f9b9d1a5SJianfeng Tan {
623f9b9d1a5SJianfeng Tan 	uint16_t avail_idx, desc_idx;
624f9b9d1a5SJianfeng Tan 	struct vring_used_elem *uep;
625f9b9d1a5SJianfeng Tan 	uint32_t n_descs;
626f9b9d1a5SJianfeng Tan 	struct vring *vring = &dev->vrings[queue_idx];
627f9b9d1a5SJianfeng Tan 
628f9b9d1a5SJianfeng Tan 	/* Consume avail ring, using used ring idx as first one */
629f9b9d1a5SJianfeng Tan 	while (vring->used->idx != vring->avail->idx) {
630f9b9d1a5SJianfeng Tan 		avail_idx = (vring->used->idx) & (vring->num - 1);
631f9b9d1a5SJianfeng Tan 		desc_idx = vring->avail->ring[avail_idx];
632f9b9d1a5SJianfeng Tan 
633f9b9d1a5SJianfeng Tan 		n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
634f9b9d1a5SJianfeng Tan 
635f9b9d1a5SJianfeng Tan 		/* Update used ring */
636f9b9d1a5SJianfeng Tan 		uep = &vring->used->ring[avail_idx];
637f9b9d1a5SJianfeng Tan 		uep->id = avail_idx;
638f9b9d1a5SJianfeng Tan 		uep->len = n_descs;
639f9b9d1a5SJianfeng Tan 
640f9b9d1a5SJianfeng Tan 		vring->used->idx++;
641f9b9d1a5SJianfeng Tan 	}
642f9b9d1a5SJianfeng Tan }
643