xref: /dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c (revision 1e9057a97b3d5045f17412f19a9dc578a8067cc9)
1e3b43481SJianfeng Tan /*-
2e3b43481SJianfeng Tan  *   BSD LICENSE
3e3b43481SJianfeng Tan  *
4e3b43481SJianfeng Tan  *   Copyright(c) 2016 Intel Corporation. All rights reserved.
5e3b43481SJianfeng Tan  *   All rights reserved.
6e3b43481SJianfeng Tan  *
7e3b43481SJianfeng Tan  *   Redistribution and use in source and binary forms, with or without
8e3b43481SJianfeng Tan  *   modification, are permitted provided that the following conditions
9e3b43481SJianfeng Tan  *   are met:
10e3b43481SJianfeng Tan  *
11e3b43481SJianfeng Tan  *     * Redistributions of source code must retain the above copyright
12e3b43481SJianfeng Tan  *       notice, this list of conditions and the following disclaimer.
13e3b43481SJianfeng Tan  *     * Redistributions in binary form must reproduce the above copyright
14e3b43481SJianfeng Tan  *       notice, this list of conditions and the following disclaimer in
15e3b43481SJianfeng Tan  *       the documentation and/or other materials provided with the
16e3b43481SJianfeng Tan  *       distribution.
17e3b43481SJianfeng Tan  *     * Neither the name of Intel Corporation nor the names of its
18e3b43481SJianfeng Tan  *       contributors may be used to endorse or promote products derived
19e3b43481SJianfeng Tan  *       from this software without specific prior written permission.
20e3b43481SJianfeng Tan  *
21e3b43481SJianfeng Tan  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22e3b43481SJianfeng Tan  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23e3b43481SJianfeng Tan  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24e3b43481SJianfeng Tan  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25e3b43481SJianfeng Tan  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26e3b43481SJianfeng Tan  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27e3b43481SJianfeng Tan  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28e3b43481SJianfeng Tan  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29e3b43481SJianfeng Tan  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30e3b43481SJianfeng Tan  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31e3b43481SJianfeng Tan  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32e3b43481SJianfeng Tan  */
33e3b43481SJianfeng Tan 
34e3b43481SJianfeng Tan #include <sys/types.h>
35e3b43481SJianfeng Tan #include <sys/stat.h>
36e3b43481SJianfeng Tan #include <fcntl.h>
37e3b43481SJianfeng Tan #include <unistd.h>
38e3b43481SJianfeng Tan 
39e3b43481SJianfeng Tan #include <rte_memory.h>
40e3b43481SJianfeng Tan #include <rte_eal_memconfig.h>
41e3b43481SJianfeng Tan 
42e3b43481SJianfeng Tan #include "vhost.h"
43e3b43481SJianfeng Tan #include "virtio_user_dev.h"
44e3b43481SJianfeng Tan #include "vhost_kernel_tap.h"
45e3b43481SJianfeng Tan 
46e3b43481SJianfeng Tan struct vhost_memory_kernel {
47e3b43481SJianfeng Tan 	uint32_t nregions;
48e3b43481SJianfeng Tan 	uint32_t padding;
49e3b43481SJianfeng Tan 	struct vhost_memory_region regions[0];
50e3b43481SJianfeng Tan };
51e3b43481SJianfeng Tan 
52e3b43481SJianfeng Tan /* vhost kernel ioctls */
53e3b43481SJianfeng Tan #define VHOST_VIRTIO 0xAF
54e3b43481SJianfeng Tan #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
55e3b43481SJianfeng Tan #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
56e3b43481SJianfeng Tan #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
57e3b43481SJianfeng Tan #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
58e3b43481SJianfeng Tan #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel)
59e3b43481SJianfeng Tan #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
60e3b43481SJianfeng Tan #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
61e3b43481SJianfeng Tan #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
62e3b43481SJianfeng Tan #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
63e3b43481SJianfeng Tan #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
64e3b43481SJianfeng Tan #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
65e3b43481SJianfeng Tan #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
66e3b43481SJianfeng Tan #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
67e3b43481SJianfeng Tan #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
68e3b43481SJianfeng Tan #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
69e3b43481SJianfeng Tan 
70e3b43481SJianfeng Tan static uint64_t max_regions = 64;
71e3b43481SJianfeng Tan 
72e3b43481SJianfeng Tan static void
73e3b43481SJianfeng Tan get_vhost_kernel_max_regions(void)
74e3b43481SJianfeng Tan {
75e3b43481SJianfeng Tan 	int fd;
76e3b43481SJianfeng Tan 	char buf[20] = {'\0'};
77e3b43481SJianfeng Tan 
78e3b43481SJianfeng Tan 	fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY);
79e3b43481SJianfeng Tan 	if (fd < 0)
80e3b43481SJianfeng Tan 		return;
81e3b43481SJianfeng Tan 
82e3b43481SJianfeng Tan 	if (read(fd, buf, sizeof(buf) - 1) > 0)
83e3b43481SJianfeng Tan 		max_regions = strtoull(buf, NULL, 10);
84e3b43481SJianfeng Tan 
85e3b43481SJianfeng Tan 	close(fd);
86e3b43481SJianfeng Tan }
87e3b43481SJianfeng Tan 
88e3b43481SJianfeng Tan static uint64_t vhost_req_user_to_kernel[] = {
89e3b43481SJianfeng Tan 	[VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
90e3b43481SJianfeng Tan 	[VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
91e3b43481SJianfeng Tan 	[VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES,
92e3b43481SJianfeng Tan 	[VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES,
93e3b43481SJianfeng Tan 	[VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL,
94e3b43481SJianfeng Tan 	[VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM,
95e3b43481SJianfeng Tan 	[VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE,
96e3b43481SJianfeng Tan 	[VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE,
97e3b43481SJianfeng Tan 	[VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
98e3b43481SJianfeng Tan 	[VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK,
99e3b43481SJianfeng Tan 	[VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE,
100e3b43481SJianfeng Tan };
101e3b43481SJianfeng Tan 
102e3b43481SJianfeng Tan /* By default, vhost kernel module allows 64 regions, but DPDK allows
103e3b43481SJianfeng Tan  * 256 segments. As a relief, below function merges those virtually
104e3b43481SJianfeng Tan  * adjacent memsegs into one region.
105e3b43481SJianfeng Tan  */
106e3b43481SJianfeng Tan static struct vhost_memory_kernel *
107e3b43481SJianfeng Tan prepare_vhost_memory_kernel(void)
108e3b43481SJianfeng Tan {
109e3b43481SJianfeng Tan 	uint32_t i, j, k = 0;
110e3b43481SJianfeng Tan 	struct rte_memseg *seg;
111e3b43481SJianfeng Tan 	struct vhost_memory_region *mr;
112e3b43481SJianfeng Tan 	struct vhost_memory_kernel *vm;
113e3b43481SJianfeng Tan 
114e3b43481SJianfeng Tan 	vm = malloc(sizeof(struct vhost_memory_kernel) +
115e3b43481SJianfeng Tan 		    max_regions *
116e3b43481SJianfeng Tan 		    sizeof(struct vhost_memory_region));
117*1e9057a9SJianfeng Tan 	if (!vm)
118*1e9057a9SJianfeng Tan 		return NULL;
119e3b43481SJianfeng Tan 
120e3b43481SJianfeng Tan 	for (i = 0; i < RTE_MAX_MEMSEG; ++i) {
121e3b43481SJianfeng Tan 		seg = &rte_eal_get_configuration()->mem_config->memseg[i];
122e3b43481SJianfeng Tan 		if (!seg->addr)
123e3b43481SJianfeng Tan 			break;
124e3b43481SJianfeng Tan 
125e3b43481SJianfeng Tan 		int new_region = 1;
126e3b43481SJianfeng Tan 
127e3b43481SJianfeng Tan 		for (j = 0; j < k; ++j) {
128e3b43481SJianfeng Tan 			mr = &vm->regions[j];
129e3b43481SJianfeng Tan 
130e3b43481SJianfeng Tan 			if (mr->userspace_addr + mr->memory_size ==
131e3b43481SJianfeng Tan 			    (uint64_t)(uintptr_t)seg->addr) {
132e3b43481SJianfeng Tan 				mr->memory_size += seg->len;
133e3b43481SJianfeng Tan 				new_region = 0;
134e3b43481SJianfeng Tan 				break;
135e3b43481SJianfeng Tan 			}
136e3b43481SJianfeng Tan 
137e3b43481SJianfeng Tan 			if ((uint64_t)(uintptr_t)seg->addr + seg->len ==
138e3b43481SJianfeng Tan 			    mr->userspace_addr) {
139e3b43481SJianfeng Tan 				mr->guest_phys_addr =
140e3b43481SJianfeng Tan 					(uint64_t)(uintptr_t)seg->addr;
141e3b43481SJianfeng Tan 				mr->userspace_addr =
142e3b43481SJianfeng Tan 					(uint64_t)(uintptr_t)seg->addr;
143e3b43481SJianfeng Tan 				mr->memory_size += seg->len;
144e3b43481SJianfeng Tan 				new_region = 0;
145e3b43481SJianfeng Tan 				break;
146e3b43481SJianfeng Tan 			}
147e3b43481SJianfeng Tan 		}
148e3b43481SJianfeng Tan 
149e3b43481SJianfeng Tan 		if (new_region == 0)
150e3b43481SJianfeng Tan 			continue;
151e3b43481SJianfeng Tan 
152e3b43481SJianfeng Tan 		mr = &vm->regions[k++];
153e3b43481SJianfeng Tan 		/* use vaddr here! */
154e3b43481SJianfeng Tan 		mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr;
155e3b43481SJianfeng Tan 		mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr;
156e3b43481SJianfeng Tan 		mr->memory_size = seg->len;
157e3b43481SJianfeng Tan 		mr->mmap_offset = 0;
158e3b43481SJianfeng Tan 
159e3b43481SJianfeng Tan 		if (k >= max_regions) {
160e3b43481SJianfeng Tan 			free(vm);
161e3b43481SJianfeng Tan 			return NULL;
162e3b43481SJianfeng Tan 		}
163e3b43481SJianfeng Tan 	}
164e3b43481SJianfeng Tan 
165e3b43481SJianfeng Tan 	vm->nregions = k;
166e3b43481SJianfeng Tan 	vm->padding = 0;
167e3b43481SJianfeng Tan 	return vm;
168e3b43481SJianfeng Tan }
169e3b43481SJianfeng Tan 
1705e97e420SJianfeng Tan /* with below features, vhost kernel does not need to do the checksum and TSO,
1715e97e420SJianfeng Tan  * these info will be passed to virtio_user through virtio net header.
1725e97e420SJianfeng Tan  */
1735e97e420SJianfeng Tan #define VHOST_KERNEL_GUEST_OFFLOADS_MASK	\
1745e97e420SJianfeng Tan 	((1ULL << VIRTIO_NET_F_GUEST_CSUM) |	\
1755e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |	\
1765e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |	\
1775e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |	\
1785e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_GUEST_UFO))
1795e97e420SJianfeng Tan 
1805e97e420SJianfeng Tan /* with below features, when flows from virtio_user to vhost kernel
1815e97e420SJianfeng Tan  * (1) if flows goes up through the kernel networking stack, it does not need
1825e97e420SJianfeng Tan  * to verify checksum, which can save CPU cycles;
1835e97e420SJianfeng Tan  * (2) if flows goes through a Linux bridge and outside from an interface
1845e97e420SJianfeng Tan  * (kernel driver), checksum and TSO will be done by GSO in kernel or even
1855e97e420SJianfeng Tan  * offloaded into real physical device.
1865e97e420SJianfeng Tan  */
1875e97e420SJianfeng Tan #define VHOST_KERNEL_HOST_OFFLOADS_MASK		\
1885e97e420SJianfeng Tan 	((1ULL << VIRTIO_NET_F_HOST_TSO4) |	\
1895e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_HOST_TSO6) |	\
1905e97e420SJianfeng Tan 	 (1ULL << VIRTIO_NET_F_CSUM))
1915e97e420SJianfeng Tan 
192e3b43481SJianfeng Tan static int
193be7a4707SJianfeng Tan tap_supporte_mq(void)
194be7a4707SJianfeng Tan {
195be7a4707SJianfeng Tan 	int tapfd;
196be7a4707SJianfeng Tan 	unsigned int tap_features;
197be7a4707SJianfeng Tan 
198be7a4707SJianfeng Tan 	tapfd = open(PATH_NET_TUN, O_RDWR);
199be7a4707SJianfeng Tan 	if (tapfd < 0) {
200be7a4707SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to open %s: %s",
201be7a4707SJianfeng Tan 			    PATH_NET_TUN, strerror(errno));
202be7a4707SJianfeng Tan 		return -1;
203be7a4707SJianfeng Tan 	}
204be7a4707SJianfeng Tan 
205be7a4707SJianfeng Tan 	if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
206be7a4707SJianfeng Tan 		PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
207be7a4707SJianfeng Tan 		close(tapfd);
208be7a4707SJianfeng Tan 		return -1;
209be7a4707SJianfeng Tan 	}
210be7a4707SJianfeng Tan 
211be7a4707SJianfeng Tan 	close(tapfd);
212be7a4707SJianfeng Tan 	return tap_features & IFF_MULTI_QUEUE;
213be7a4707SJianfeng Tan }
214be7a4707SJianfeng Tan 
215be7a4707SJianfeng Tan static int
216e3b43481SJianfeng Tan vhost_kernel_ioctl(struct virtio_user_dev *dev,
217e3b43481SJianfeng Tan 		   enum vhost_user_request req,
218e3b43481SJianfeng Tan 		   void *arg)
219e3b43481SJianfeng Tan {
220e3b43481SJianfeng Tan 	int ret = -1;
221e3b43481SJianfeng Tan 	unsigned int i;
222e3b43481SJianfeng Tan 	uint64_t req_kernel;
223e3b43481SJianfeng Tan 	struct vhost_memory_kernel *vm = NULL;
224be7a4707SJianfeng Tan 	int vhostfd;
225be7a4707SJianfeng Tan 	unsigned int queue_sel;
226e3b43481SJianfeng Tan 
227e3b43481SJianfeng Tan 	PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
228e3b43481SJianfeng Tan 
229e3b43481SJianfeng Tan 	req_kernel = vhost_req_user_to_kernel[req];
230e3b43481SJianfeng Tan 
231e3b43481SJianfeng Tan 	if (req_kernel == VHOST_SET_MEM_TABLE) {
232e3b43481SJianfeng Tan 		vm = prepare_vhost_memory_kernel();
233e3b43481SJianfeng Tan 		if (!vm)
234e3b43481SJianfeng Tan 			return -1;
235e3b43481SJianfeng Tan 		arg = (void *)vm;
236e3b43481SJianfeng Tan 	}
237e3b43481SJianfeng Tan 
2385e97e420SJianfeng Tan 	if (req_kernel == VHOST_SET_FEATURES) {
239e3b43481SJianfeng Tan 		/* We don't need memory protection here */
240e3b43481SJianfeng Tan 		*(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
241e3b43481SJianfeng Tan 
2425e97e420SJianfeng Tan 		/* VHOST kernel does not know about below flags */
2435e97e420SJianfeng Tan 		*(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
2445e97e420SJianfeng Tan 		*(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
245be7a4707SJianfeng Tan 
246be7a4707SJianfeng Tan 		*(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ);
2475e97e420SJianfeng Tan 	}
2485e97e420SJianfeng Tan 
249be7a4707SJianfeng Tan 	switch (req_kernel) {
250be7a4707SJianfeng Tan 	case VHOST_SET_VRING_NUM:
251be7a4707SJianfeng Tan 	case VHOST_SET_VRING_ADDR:
252be7a4707SJianfeng Tan 	case VHOST_SET_VRING_BASE:
253be7a4707SJianfeng Tan 	case VHOST_GET_VRING_BASE:
254be7a4707SJianfeng Tan 	case VHOST_SET_VRING_KICK:
255be7a4707SJianfeng Tan 	case VHOST_SET_VRING_CALL:
256be7a4707SJianfeng Tan 		queue_sel = *(unsigned int *)arg;
257be7a4707SJianfeng Tan 		vhostfd = dev->vhostfds[queue_sel / 2];
258be7a4707SJianfeng Tan 		*(unsigned int *)arg = queue_sel % 2;
259be7a4707SJianfeng Tan 		PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u",
260be7a4707SJianfeng Tan 			    vhostfd, *(unsigned int *)arg);
261be7a4707SJianfeng Tan 		break;
262be7a4707SJianfeng Tan 	default:
263be7a4707SJianfeng Tan 		vhostfd = -1;
264be7a4707SJianfeng Tan 	}
265be7a4707SJianfeng Tan 	if (vhostfd == -1) {
266e3b43481SJianfeng Tan 		for (i = 0; i < dev->max_queue_pairs; ++i) {
267e3b43481SJianfeng Tan 			if (dev->vhostfds[i] < 0)
268e3b43481SJianfeng Tan 				continue;
269e3b43481SJianfeng Tan 
270e3b43481SJianfeng Tan 			ret = ioctl(dev->vhostfds[i], req_kernel, arg);
271e3b43481SJianfeng Tan 			if (ret < 0)
272e3b43481SJianfeng Tan 				break;
273e3b43481SJianfeng Tan 		}
274be7a4707SJianfeng Tan 	} else {
275be7a4707SJianfeng Tan 		ret = ioctl(vhostfd, req_kernel, arg);
276be7a4707SJianfeng Tan 	}
277e3b43481SJianfeng Tan 
2785e97e420SJianfeng Tan 	if (!ret && req_kernel == VHOST_GET_FEATURES) {
2795e97e420SJianfeng Tan 		/* with tap as the backend, all these features are supported
2805e97e420SJianfeng Tan 		 * but not claimed by vhost-net, so we add them back when
2815e97e420SJianfeng Tan 		 * reporting to upper layer.
2825e97e420SJianfeng Tan 		 */
2835e97e420SJianfeng Tan 		*((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
2845e97e420SJianfeng Tan 		*((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
285be7a4707SJianfeng Tan 
286be7a4707SJianfeng Tan 		/* vhost_kernel will not declare this feature, but it does
287be7a4707SJianfeng Tan 		 * support multi-queue.
288be7a4707SJianfeng Tan 		 */
289be7a4707SJianfeng Tan 		if (tap_supporte_mq())
290be7a4707SJianfeng Tan 			*(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ);
2915e97e420SJianfeng Tan 	}
2925e97e420SJianfeng Tan 
293e3b43481SJianfeng Tan 	if (vm)
294e3b43481SJianfeng Tan 		free(vm);
295e3b43481SJianfeng Tan 
296e3b43481SJianfeng Tan 	if (ret < 0)
297e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "%s failed: %s",
298e3b43481SJianfeng Tan 			    vhost_msg_strings[req], strerror(errno));
299e3b43481SJianfeng Tan 
300e3b43481SJianfeng Tan 	return ret;
301e3b43481SJianfeng Tan }
302e3b43481SJianfeng Tan 
303e3b43481SJianfeng Tan /**
304e3b43481SJianfeng Tan  * Set up environment to talk with a vhost kernel backend.
305e3b43481SJianfeng Tan  *
306e3b43481SJianfeng Tan  * @return
307e3b43481SJianfeng Tan  *   - (-1) if fail to set up;
308e3b43481SJianfeng Tan  *   - (>=0) if successful.
309e3b43481SJianfeng Tan  */
310e3b43481SJianfeng Tan static int
311e3b43481SJianfeng Tan vhost_kernel_setup(struct virtio_user_dev *dev)
312e3b43481SJianfeng Tan {
313e3b43481SJianfeng Tan 	int vhostfd;
314e3b43481SJianfeng Tan 	uint32_t i;
315e3b43481SJianfeng Tan 
316e3b43481SJianfeng Tan 	get_vhost_kernel_max_regions();
317e3b43481SJianfeng Tan 
318e3b43481SJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i) {
319e3b43481SJianfeng Tan 		vhostfd = open(dev->path, O_RDWR);
320e3b43481SJianfeng Tan 		if (vhostfd < 0) {
321e3b43481SJianfeng Tan 			PMD_DRV_LOG(ERR, "fail to open %s, %s",
322e3b43481SJianfeng Tan 				    dev->path, strerror(errno));
323e3b43481SJianfeng Tan 			return -1;
324e3b43481SJianfeng Tan 		}
325e3b43481SJianfeng Tan 
326e3b43481SJianfeng Tan 		dev->vhostfds[i] = vhostfd;
327e3b43481SJianfeng Tan 	}
328e3b43481SJianfeng Tan 
329e3b43481SJianfeng Tan 	return 0;
330e3b43481SJianfeng Tan }
331e3b43481SJianfeng Tan 
332e3b43481SJianfeng Tan static int
333e3b43481SJianfeng Tan vhost_kernel_set_backend(int vhostfd, int tapfd)
334e3b43481SJianfeng Tan {
335e3b43481SJianfeng Tan 	struct vhost_vring_file f;
336e3b43481SJianfeng Tan 
337e3b43481SJianfeng Tan 	f.fd = tapfd;
338e3b43481SJianfeng Tan 	f.index = 0;
339e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
340e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
341e3b43481SJianfeng Tan 				strerror(errno));
342e3b43481SJianfeng Tan 		return -1;
343e3b43481SJianfeng Tan 	}
344e3b43481SJianfeng Tan 
345e3b43481SJianfeng Tan 	f.index = 1;
346e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
347e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
348e3b43481SJianfeng Tan 				strerror(errno));
349e3b43481SJianfeng Tan 		return -1;
350e3b43481SJianfeng Tan 	}
351e3b43481SJianfeng Tan 
352e3b43481SJianfeng Tan 	return 0;
353e3b43481SJianfeng Tan }
354e3b43481SJianfeng Tan 
355e3b43481SJianfeng Tan static int
356e3b43481SJianfeng Tan vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
357e3b43481SJianfeng Tan 			       uint16_t pair_idx,
358e3b43481SJianfeng Tan 			       int enable)
359e3b43481SJianfeng Tan {
360e3b43481SJianfeng Tan 	int hdr_size;
361e3b43481SJianfeng Tan 	int vhostfd;
362e3b43481SJianfeng Tan 	int tapfd;
363be7a4707SJianfeng Tan 	int req_mq = (dev->max_queue_pairs > 1);
364e3b43481SJianfeng Tan 
365e3b43481SJianfeng Tan 	vhostfd = dev->vhostfds[pair_idx];
366e3b43481SJianfeng Tan 
367e3b43481SJianfeng Tan 	if (!enable) {
368e3b43481SJianfeng Tan 		if (dev->tapfds[pair_idx]) {
369e3b43481SJianfeng Tan 			close(dev->tapfds[pair_idx]);
370e3b43481SJianfeng Tan 			dev->tapfds[pair_idx] = -1;
371e3b43481SJianfeng Tan 		}
372e3b43481SJianfeng Tan 		return vhost_kernel_set_backend(vhostfd, -1);
373e3b43481SJianfeng Tan 	} else if (dev->tapfds[pair_idx] >= 0) {
374e3b43481SJianfeng Tan 		return 0;
375e3b43481SJianfeng Tan 	}
376e3b43481SJianfeng Tan 
377e3b43481SJianfeng Tan 	if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
378e3b43481SJianfeng Tan 	    (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
379e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
380e3b43481SJianfeng Tan 	else
381e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr);
382e3b43481SJianfeng Tan 
383be7a4707SJianfeng Tan 	tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq);
384e3b43481SJianfeng Tan 	if (tapfd < 0) {
385e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
386e3b43481SJianfeng Tan 		return -1;
387e3b43481SJianfeng Tan 	}
388e3b43481SJianfeng Tan 
389e3b43481SJianfeng Tan 	if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
390e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
391e3b43481SJianfeng Tan 		close(tapfd);
392e3b43481SJianfeng Tan 		return -1;
393e3b43481SJianfeng Tan 	}
394e3b43481SJianfeng Tan 
395e3b43481SJianfeng Tan 	dev->tapfds[pair_idx] = tapfd;
396e3b43481SJianfeng Tan 	return 0;
397e3b43481SJianfeng Tan }
398e3b43481SJianfeng Tan 
399e3b43481SJianfeng Tan struct virtio_user_backend_ops ops_kernel = {
400e3b43481SJianfeng Tan 	.setup = vhost_kernel_setup,
401e3b43481SJianfeng Tan 	.send_request = vhost_kernel_ioctl,
402e3b43481SJianfeng Tan 	.enable_qp = vhost_kernel_enable_queue_pair
403e3b43481SJianfeng Tan };
404