xref: /dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c (revision b01e1e9a0d3e77e91dba61d90bed4ef6252e5ae0)
15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
25566a3e3SBruce Richardson  * Copyright(c) 2016 Intel Corporation
3e3b43481SJianfeng Tan  */
4e3b43481SJianfeng Tan 
5e3b43481SJianfeng Tan #include <sys/types.h>
6e3b43481SJianfeng Tan #include <sys/stat.h>
7e3b43481SJianfeng Tan #include <fcntl.h>
8e3b43481SJianfeng Tan #include <unistd.h>
906856cabSMaxime Coquelin #include <errno.h>
10e3b43481SJianfeng Tan 
11e3b43481SJianfeng Tan #include <rte_memory.h>
12e3b43481SJianfeng Tan 
13e3b43481SJianfeng Tan #include "vhost.h"
14e3b43481SJianfeng Tan #include "virtio_user_dev.h"
15e3b43481SJianfeng Tan #include "vhost_kernel_tap.h"
16e3b43481SJianfeng Tan 
1786388a3aSMaxime Coquelin struct vhost_kernel_data {
1886388a3aSMaxime Coquelin 	int *vhostfds;
1986388a3aSMaxime Coquelin 	int *tapfds;
2086388a3aSMaxime Coquelin };
2186388a3aSMaxime Coquelin 
22e3b43481SJianfeng Tan struct vhost_memory_kernel {
23e3b43481SJianfeng Tan 	uint32_t nregions;
24e3b43481SJianfeng Tan 	uint32_t padding;
25e3b43481SJianfeng Tan 	struct vhost_memory_region regions[0];
26e3b43481SJianfeng Tan };
27e3b43481SJianfeng Tan 
28e3b43481SJianfeng Tan /* vhost kernel ioctls */
29e3b43481SJianfeng Tan #define VHOST_VIRTIO 0xAF
30e3b43481SJianfeng Tan #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
31e3b43481SJianfeng Tan #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
32e3b43481SJianfeng Tan #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
33e3b43481SJianfeng Tan #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
34e3b43481SJianfeng Tan #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel)
35e3b43481SJianfeng Tan #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
36e3b43481SJianfeng Tan #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
37e3b43481SJianfeng Tan #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
38e3b43481SJianfeng Tan #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
39e3b43481SJianfeng Tan #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
40e3b43481SJianfeng Tan #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
41e3b43481SJianfeng Tan #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
42e3b43481SJianfeng Tan #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
43e3b43481SJianfeng Tan #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
44e3b43481SJianfeng Tan #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
45e3b43481SJianfeng Tan 
46cc0151b3SMaxime Coquelin /* with below features, vhost kernel does not need to do the checksum and TSO,
47cc0151b3SMaxime Coquelin  * these info will be passed to virtio_user through virtio net header.
48cc0151b3SMaxime Coquelin  */
49cc0151b3SMaxime Coquelin #define VHOST_KERNEL_GUEST_OFFLOADS_MASK	\
50cc0151b3SMaxime Coquelin 	((1ULL << VIRTIO_NET_F_GUEST_CSUM) |	\
51cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |	\
52cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |	\
53cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |	\
54cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_UFO))
55cc0151b3SMaxime Coquelin 
56cc0151b3SMaxime Coquelin /* with below features, when flows from virtio_user to vhost kernel
57cc0151b3SMaxime Coquelin  * (1) if flows goes up through the kernel networking stack, it does not need
58cc0151b3SMaxime Coquelin  * to verify checksum, which can save CPU cycles;
59cc0151b3SMaxime Coquelin  * (2) if flows goes through a Linux bridge and outside from an interface
60cc0151b3SMaxime Coquelin  * (kernel driver), checksum and TSO will be done by GSO in kernel or even
61cc0151b3SMaxime Coquelin  * offloaded into real physical device.
62cc0151b3SMaxime Coquelin  */
63cc0151b3SMaxime Coquelin #define VHOST_KERNEL_HOST_OFFLOADS_MASK		\
64cc0151b3SMaxime Coquelin 	((1ULL << VIRTIO_NET_F_HOST_TSO4) |	\
65cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_HOST_TSO6) |	\
66cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_CSUM))
67cc0151b3SMaxime Coquelin 
68e3b43481SJianfeng Tan static uint64_t max_regions = 64;
69e3b43481SJianfeng Tan 
70e3b43481SJianfeng Tan static void
71e3b43481SJianfeng Tan get_vhost_kernel_max_regions(void)
72e3b43481SJianfeng Tan {
73e3b43481SJianfeng Tan 	int fd;
74e3b43481SJianfeng Tan 	char buf[20] = {'\0'};
75e3b43481SJianfeng Tan 
76e3b43481SJianfeng Tan 	fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY);
77e3b43481SJianfeng Tan 	if (fd < 0)
78e3b43481SJianfeng Tan 		return;
79e3b43481SJianfeng Tan 
80e3b43481SJianfeng Tan 	if (read(fd, buf, sizeof(buf) - 1) > 0)
81e3b43481SJianfeng Tan 		max_regions = strtoull(buf, NULL, 10);
82e3b43481SJianfeng Tan 
83e3b43481SJianfeng Tan 	close(fd);
84e3b43481SJianfeng Tan }
85e3b43481SJianfeng Tan 
8606856cabSMaxime Coquelin static int
8706856cabSMaxime Coquelin vhost_kernel_ioctl(int fd, uint64_t request, void *arg)
8806856cabSMaxime Coquelin {
8906856cabSMaxime Coquelin 	int ret;
9006856cabSMaxime Coquelin 
9106856cabSMaxime Coquelin 	ret = ioctl(fd, request, arg);
9206856cabSMaxime Coquelin 	if (ret) {
9306856cabSMaxime Coquelin 		PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)",
9406856cabSMaxime Coquelin 				request, strerror(errno));
9506856cabSMaxime Coquelin 		return -1;
9606856cabSMaxime Coquelin 	}
9706856cabSMaxime Coquelin 
9806856cabSMaxime Coquelin 	return 0;
9906856cabSMaxime Coquelin }
10006856cabSMaxime Coquelin 
10106856cabSMaxime Coquelin static int
10206856cabSMaxime Coquelin vhost_kernel_set_owner(struct virtio_user_dev *dev)
10306856cabSMaxime Coquelin {
1043c503b24SThierry Herbelot 	int ret;
1053c503b24SThierry Herbelot 	uint32_t i;
10686388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
10786388a3aSMaxime Coquelin 
1083c503b24SThierry Herbelot 	for (i = 0; i < dev->max_queue_pairs; ++i) {
1093c503b24SThierry Herbelot 		if (data->vhostfds[i] < 0)
1103c503b24SThierry Herbelot 			continue;
1113c503b24SThierry Herbelot 
1123c503b24SThierry Herbelot 		ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_OWNER, NULL);
1133c503b24SThierry Herbelot 		if (ret < 0)
1143c503b24SThierry Herbelot 			return ret;
1153c503b24SThierry Herbelot 	}
1163c503b24SThierry Herbelot 
1173c503b24SThierry Herbelot 	return 0;
11806856cabSMaxime Coquelin }
11906856cabSMaxime Coquelin 
120cc0151b3SMaxime Coquelin static int
121cc0151b3SMaxime Coquelin vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
122cc0151b3SMaxime Coquelin {
123cc0151b3SMaxime Coquelin 	int ret;
124cc0151b3SMaxime Coquelin 	unsigned int tap_features;
12586388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
126cc0151b3SMaxime Coquelin 
12786388a3aSMaxime Coquelin 	ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features);
128cc0151b3SMaxime Coquelin 	if (ret < 0) {
129cc0151b3SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to get features");
130cc0151b3SMaxime Coquelin 		return -1;
131cc0151b3SMaxime Coquelin 	}
132cc0151b3SMaxime Coquelin 
133cc0151b3SMaxime Coquelin 	ret = tap_support_features(&tap_features);
134cc0151b3SMaxime Coquelin 	if (ret < 0) {
135cc0151b3SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to get TAP features");
136cc0151b3SMaxime Coquelin 		return -1;
137cc0151b3SMaxime Coquelin 	}
138cc0151b3SMaxime Coquelin 
139cc0151b3SMaxime Coquelin 	/* with tap as the backend, all these features are supported
140cc0151b3SMaxime Coquelin 	 * but not claimed by vhost-net, so we add them back when
141cc0151b3SMaxime Coquelin 	 * reporting to upper layer.
142cc0151b3SMaxime Coquelin 	 */
143cc0151b3SMaxime Coquelin 	if (tap_features & IFF_VNET_HDR) {
144cc0151b3SMaxime Coquelin 		*features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
145cc0151b3SMaxime Coquelin 		*features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
146cc0151b3SMaxime Coquelin 	}
147cc0151b3SMaxime Coquelin 
148cc0151b3SMaxime Coquelin 	/* vhost_kernel will not declare this feature, but it does
149cc0151b3SMaxime Coquelin 	 * support multi-queue.
150cc0151b3SMaxime Coquelin 	 */
151cc0151b3SMaxime Coquelin 	if (tap_features & IFF_MULTI_QUEUE)
152cc0151b3SMaxime Coquelin 		*features |= (1ull << VIRTIO_NET_F_MQ);
153cc0151b3SMaxime Coquelin 
154cc0151b3SMaxime Coquelin 	return 0;
155cc0151b3SMaxime Coquelin }
156cc0151b3SMaxime Coquelin 
157cc0151b3SMaxime Coquelin static int
158cc0151b3SMaxime Coquelin vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features)
159cc0151b3SMaxime Coquelin {
16086388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
16186388a3aSMaxime Coquelin 
162cc0151b3SMaxime Coquelin 	/* We don't need memory protection here */
163cc0151b3SMaxime Coquelin 	features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
164cc0151b3SMaxime Coquelin 	/* VHOST kernel does not know about below flags */
165cc0151b3SMaxime Coquelin 	features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
166cc0151b3SMaxime Coquelin 	features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
167cc0151b3SMaxime Coquelin 	features &= ~(1ULL << VIRTIO_NET_F_MQ);
168cc0151b3SMaxime Coquelin 
16986388a3aSMaxime Coquelin 	return vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_FEATURES, &features);
170cc0151b3SMaxime Coquelin }
171cc0151b3SMaxime Coquelin 
172746c346dSAnatoly Burakov static int
17375728654STiwei Bie add_memseg_list(const struct rte_memseg_list *msl, void *arg)
174746c346dSAnatoly Burakov {
17575728654STiwei Bie 	struct vhost_memory_kernel *vm = arg;
176746c346dSAnatoly Burakov 	struct vhost_memory_region *mr;
177746c346dSAnatoly Burakov 	void *start_addr;
17875728654STiwei Bie 	uint64_t len;
179746c346dSAnatoly Burakov 
1805282bb1cSAnatoly Burakov 	if (msl->external)
1815282bb1cSAnatoly Burakov 		return 0;
1825282bb1cSAnatoly Burakov 
18375728654STiwei Bie 	if (vm->nregions >= max_regions)
184746c346dSAnatoly Burakov 		return -1;
185746c346dSAnatoly Burakov 
18675728654STiwei Bie 	start_addr = msl->base_va;
18775728654STiwei Bie 	len = msl->page_sz * msl->memseg_arr.len;
18875728654STiwei Bie 
18975728654STiwei Bie 	mr = &vm->regions[vm->nregions++];
190746c346dSAnatoly Burakov 
191746c346dSAnatoly Burakov 	mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr;
192746c346dSAnatoly Burakov 	mr->userspace_addr = (uint64_t)(uintptr_t)start_addr;
193746c346dSAnatoly Burakov 	mr->memory_size = len;
19475728654STiwei Bie 	mr->mmap_offset = 0; /* flags_padding */
19575728654STiwei Bie 
19675728654STiwei Bie 	PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64,
19775728654STiwei Bie 			vm->nregions - 1, start_addr, len);
198746c346dSAnatoly Burakov 
199746c346dSAnatoly Burakov 	return 0;
200746c346dSAnatoly Burakov }
201746c346dSAnatoly Burakov 
20275728654STiwei Bie /* By default, vhost kernel module allows 64 regions, but DPDK may
20375728654STiwei Bie  * have much more memory regions. Below function will treat each
20475728654STiwei Bie  * contiguous memory space reserved by DPDK as one region.
205e3b43481SJianfeng Tan  */
206539d910cSMaxime Coquelin static int
207539d910cSMaxime Coquelin vhost_kernel_set_memory_table(struct virtio_user_dev *dev)
208e3b43481SJianfeng Tan {
209*b01e1e9aSThierry Herbelot 	uint32_t i;
21086388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
211e3b43481SJianfeng Tan 	struct vhost_memory_kernel *vm;
212539d910cSMaxime Coquelin 	int ret;
213e3b43481SJianfeng Tan 
214e3b43481SJianfeng Tan 	vm = malloc(sizeof(struct vhost_memory_kernel) +
215e3b43481SJianfeng Tan 			max_regions *
216e3b43481SJianfeng Tan 			sizeof(struct vhost_memory_region));
2171e9057a9SJianfeng Tan 	if (!vm)
218539d910cSMaxime Coquelin 		goto err;
219e3b43481SJianfeng Tan 
22075728654STiwei Bie 	vm->nregions = 0;
22175728654STiwei Bie 	vm->padding = 0;
222e3b43481SJianfeng Tan 
2237ff26957STiwei Bie 	/*
2247ff26957STiwei Bie 	 * The memory lock has already been taken by memory subsystem
2257ff26957STiwei Bie 	 * or virtio_user_start_device().
2267ff26957STiwei Bie 	 */
227539d910cSMaxime Coquelin 	ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm);
228539d910cSMaxime Coquelin 	if (ret < 0)
229539d910cSMaxime Coquelin 		goto err_free;
230539d910cSMaxime Coquelin 
231*b01e1e9aSThierry Herbelot 	for (i = 0; i < dev->max_queue_pairs; ++i) {
232*b01e1e9aSThierry Herbelot 		if (data->vhostfds[i] < 0)
233*b01e1e9aSThierry Herbelot 			continue;
234*b01e1e9aSThierry Herbelot 
235*b01e1e9aSThierry Herbelot 		ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_MEM_TABLE, vm);
236539d910cSMaxime Coquelin 		if (ret < 0)
237539d910cSMaxime Coquelin 			goto err_free;
238*b01e1e9aSThierry Herbelot 	}
239539d910cSMaxime Coquelin 
240e3b43481SJianfeng Tan 	free(vm);
241539d910cSMaxime Coquelin 
242539d910cSMaxime Coquelin 	return 0;
243539d910cSMaxime Coquelin err_free:
244539d910cSMaxime Coquelin 	free(vm);
245539d910cSMaxime Coquelin err:
246539d910cSMaxime Coquelin 	PMD_DRV_LOG(ERR, "Failed to set memory table");
247539d910cSMaxime Coquelin 	return -1;
248e3b43481SJianfeng Tan }
249e3b43481SJianfeng Tan 
250ab9098d2SMaxime Coquelin static int
251ab9098d2SMaxime Coquelin vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state)
252ab9098d2SMaxime Coquelin {
253ab9098d2SMaxime Coquelin 	int ret, fd;
254ab9098d2SMaxime Coquelin 	unsigned int index = state->index;
25586388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
256ab9098d2SMaxime Coquelin 
257ab9098d2SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
25886388a3aSMaxime Coquelin 	fd = data->vhostfds[state->index / 2];
259ab9098d2SMaxime Coquelin 	state->index %= 2;
260ab9098d2SMaxime Coquelin 
261ab9098d2SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, req, state);
262ab9098d2SMaxime Coquelin 	if (ret < 0) {
263ab9098d2SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req);
264ab9098d2SMaxime Coquelin 		return -1;
265ab9098d2SMaxime Coquelin 	}
266ab9098d2SMaxime Coquelin 
267ab9098d2SMaxime Coquelin 	/* restore index back to queue index */
268ab9098d2SMaxime Coquelin 	state->index = index;
269ab9098d2SMaxime Coquelin 
270ab9098d2SMaxime Coquelin 	return 0;
271ab9098d2SMaxime Coquelin }
272ab9098d2SMaxime Coquelin 
273ab9098d2SMaxime Coquelin static int
274ab9098d2SMaxime Coquelin vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
275ab9098d2SMaxime Coquelin {
276ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state);
277ab9098d2SMaxime Coquelin }
278ab9098d2SMaxime Coquelin 
279ab9098d2SMaxime Coquelin static int
280ab9098d2SMaxime Coquelin vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
281ab9098d2SMaxime Coquelin {
282ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state);
283ab9098d2SMaxime Coquelin }
284ab9098d2SMaxime Coquelin 
285ab9098d2SMaxime Coquelin static int
286ab9098d2SMaxime Coquelin vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
287ab9098d2SMaxime Coquelin {
288ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state);
289ab9098d2SMaxime Coquelin }
290ab9098d2SMaxime Coquelin 
291ce399c36SMaxime Coquelin static int
292ce399c36SMaxime Coquelin vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req,
293ce399c36SMaxime Coquelin 		struct vhost_vring_file *file)
294ce399c36SMaxime Coquelin {
295ce399c36SMaxime Coquelin 	int ret, fd;
296ce399c36SMaxime Coquelin 	unsigned int index = file->index;
29786388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
298ce399c36SMaxime Coquelin 
299ce399c36SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
30086388a3aSMaxime Coquelin 	fd = data->vhostfds[file->index / 2];
301ce399c36SMaxime Coquelin 	file->index %= 2;
302ce399c36SMaxime Coquelin 
303ce399c36SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, req, file);
304ce399c36SMaxime Coquelin 	if (ret < 0) {
305ce399c36SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req);
306ce399c36SMaxime Coquelin 		return -1;
307ce399c36SMaxime Coquelin 	}
308ce399c36SMaxime Coquelin 
309ce399c36SMaxime Coquelin 	/* restore index back to queue index */
310ce399c36SMaxime Coquelin 	file->index = index;
311ce399c36SMaxime Coquelin 
312ce399c36SMaxime Coquelin 	return 0;
313ce399c36SMaxime Coquelin }
314ce399c36SMaxime Coquelin 
315ce399c36SMaxime Coquelin static int
316ce399c36SMaxime Coquelin vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
317ce399c36SMaxime Coquelin {
318ce399c36SMaxime Coquelin 	return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file);
319ce399c36SMaxime Coquelin }
320ce399c36SMaxime Coquelin 
321ce399c36SMaxime Coquelin static int
322ce399c36SMaxime Coquelin vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
323ce399c36SMaxime Coquelin {
324ce399c36SMaxime Coquelin 	return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file);
325ce399c36SMaxime Coquelin }
326ce399c36SMaxime Coquelin 
327dc65db73SMaxime Coquelin static int
328dc65db73SMaxime Coquelin vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
329dc65db73SMaxime Coquelin {
330dc65db73SMaxime Coquelin 	int ret, fd;
331dc65db73SMaxime Coquelin 	unsigned int index = addr->index;
33286388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
333dc65db73SMaxime Coquelin 
334dc65db73SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
33586388a3aSMaxime Coquelin 	fd = data->vhostfds[addr->index / 2];
336dc65db73SMaxime Coquelin 	addr->index %= 2;
337dc65db73SMaxime Coquelin 
338dc65db73SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr);
339dc65db73SMaxime Coquelin 	if (ret < 0) {
340dc65db73SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring address");
341dc65db73SMaxime Coquelin 		return -1;
342dc65db73SMaxime Coquelin 	}
343dc65db73SMaxime Coquelin 
344dc65db73SMaxime Coquelin 	/* restore index back to queue index */
345dc65db73SMaxime Coquelin 	addr->index = index;
346dc65db73SMaxime Coquelin 
347dc65db73SMaxime Coquelin 	return 0;
348dc65db73SMaxime Coquelin }
349dc65db73SMaxime Coquelin 
3508723c894SMaxime Coquelin static int
3518723c894SMaxime Coquelin vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused)
3528723c894SMaxime Coquelin {
3538723c894SMaxime Coquelin 	return -ENOTSUP;
3548723c894SMaxime Coquelin }
3558723c894SMaxime Coquelin 
3568723c894SMaxime Coquelin static int
3578723c894SMaxime Coquelin vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused)
3588723c894SMaxime Coquelin {
3598723c894SMaxime Coquelin 	return -ENOTSUP;
3608723c894SMaxime Coquelin }
3618723c894SMaxime Coquelin 
362e3b43481SJianfeng Tan /**
363e3b43481SJianfeng Tan  * Set up environment to talk with a vhost kernel backend.
364e3b43481SJianfeng Tan  *
365e3b43481SJianfeng Tan  * @return
366e3b43481SJianfeng Tan  *   - (-1) if fail to set up;
367e3b43481SJianfeng Tan  *   - (>=0) if successful.
368e3b43481SJianfeng Tan  */
369e3b43481SJianfeng Tan static int
370e3b43481SJianfeng Tan vhost_kernel_setup(struct virtio_user_dev *dev)
371e3b43481SJianfeng Tan {
372e3b43481SJianfeng Tan 	int vhostfd;
37386388a3aSMaxime Coquelin 	uint32_t q, i;
37486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data;
37586388a3aSMaxime Coquelin 
37686388a3aSMaxime Coquelin 	data = malloc(sizeof(*data));
37786388a3aSMaxime Coquelin 	if (!data) {
37886388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path);
37986388a3aSMaxime Coquelin 		return -1;
38086388a3aSMaxime Coquelin 	}
38186388a3aSMaxime Coquelin 
38286388a3aSMaxime Coquelin 	data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
38386388a3aSMaxime Coquelin 	if (!data->vhostfds) {
38486388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path);
38586388a3aSMaxime Coquelin 		goto err_data;
38686388a3aSMaxime Coquelin 	}
38786388a3aSMaxime Coquelin 	data->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
38886388a3aSMaxime Coquelin 	if (!data->tapfds) {
38986388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path);
39086388a3aSMaxime Coquelin 		goto err_vhostfds;
39186388a3aSMaxime Coquelin 	}
39286388a3aSMaxime Coquelin 
39386388a3aSMaxime Coquelin 	for (q = 0; q < dev->max_queue_pairs; ++q) {
39486388a3aSMaxime Coquelin 		data->vhostfds[q] = -1;
39586388a3aSMaxime Coquelin 		data->tapfds[q] = -1;
39686388a3aSMaxime Coquelin 	}
397e3b43481SJianfeng Tan 
398e3b43481SJianfeng Tan 	get_vhost_kernel_max_regions();
399e3b43481SJianfeng Tan 
400e3b43481SJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i) {
401e3b43481SJianfeng Tan 		vhostfd = open(dev->path, O_RDWR);
402e3b43481SJianfeng Tan 		if (vhostfd < 0) {
40386388a3aSMaxime Coquelin 			PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno));
40486388a3aSMaxime Coquelin 			goto err_tapfds;
40586388a3aSMaxime Coquelin 		}
40686388a3aSMaxime Coquelin 
40786388a3aSMaxime Coquelin 		data->vhostfds[i] = vhostfd;
40886388a3aSMaxime Coquelin 	}
40986388a3aSMaxime Coquelin 
41086388a3aSMaxime Coquelin 	dev->backend_data = data;
41186388a3aSMaxime Coquelin 
41286388a3aSMaxime Coquelin 	return 0;
41386388a3aSMaxime Coquelin 
41486388a3aSMaxime Coquelin err_tapfds:
41586388a3aSMaxime Coquelin 	for (i = 0; i < dev->max_queue_pairs; i++)
41686388a3aSMaxime Coquelin 		if (data->vhostfds[i] >= 0)
41786388a3aSMaxime Coquelin 			close(data->vhostfds[i]);
41886388a3aSMaxime Coquelin 
41986388a3aSMaxime Coquelin 	free(data->tapfds);
42086388a3aSMaxime Coquelin err_vhostfds:
42186388a3aSMaxime Coquelin 	free(data->vhostfds);
42286388a3aSMaxime Coquelin err_data:
42386388a3aSMaxime Coquelin 	free(data);
42486388a3aSMaxime Coquelin 
425e3b43481SJianfeng Tan 	return -1;
426e3b43481SJianfeng Tan }
427e3b43481SJianfeng Tan 
428e3b43481SJianfeng Tan static int
42986388a3aSMaxime Coquelin vhost_kernel_destroy(struct virtio_user_dev *dev)
430748e5ea5SMaxime Coquelin {
43186388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
43286388a3aSMaxime Coquelin 	uint32_t i;
43386388a3aSMaxime Coquelin 
43486388a3aSMaxime Coquelin 	if (!data)
43586388a3aSMaxime Coquelin 		return 0;
43686388a3aSMaxime Coquelin 
43786388a3aSMaxime Coquelin 	for (i = 0; i < dev->max_queue_pairs; ++i) {
43886388a3aSMaxime Coquelin 		if (data->vhostfds[i] >= 0)
43986388a3aSMaxime Coquelin 			close(data->vhostfds[i]);
44086388a3aSMaxime Coquelin 		if (data->tapfds[i] >= 0)
44186388a3aSMaxime Coquelin 			close(data->tapfds[i]);
44286388a3aSMaxime Coquelin 	}
44386388a3aSMaxime Coquelin 
44486388a3aSMaxime Coquelin 	free(data->vhostfds);
44586388a3aSMaxime Coquelin 	free(data->tapfds);
44686388a3aSMaxime Coquelin 	free(data);
44786388a3aSMaxime Coquelin 	dev->backend_data = NULL;
44886388a3aSMaxime Coquelin 
449748e5ea5SMaxime Coquelin 	return 0;
450748e5ea5SMaxime Coquelin }
451748e5ea5SMaxime Coquelin 
452748e5ea5SMaxime Coquelin static int
453e3b43481SJianfeng Tan vhost_kernel_set_backend(int vhostfd, int tapfd)
454e3b43481SJianfeng Tan {
455e3b43481SJianfeng Tan 	struct vhost_vring_file f;
456e3b43481SJianfeng Tan 
457e3b43481SJianfeng Tan 	f.fd = tapfd;
458e3b43481SJianfeng Tan 	f.index = 0;
459e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
460e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
461e3b43481SJianfeng Tan 				strerror(errno));
462e3b43481SJianfeng Tan 		return -1;
463e3b43481SJianfeng Tan 	}
464e3b43481SJianfeng Tan 
465e3b43481SJianfeng Tan 	f.index = 1;
466e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
467e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
468e3b43481SJianfeng Tan 				strerror(errno));
469e3b43481SJianfeng Tan 		return -1;
470e3b43481SJianfeng Tan 	}
471e3b43481SJianfeng Tan 
472e3b43481SJianfeng Tan 	return 0;
473e3b43481SJianfeng Tan }
474e3b43481SJianfeng Tan 
475e3b43481SJianfeng Tan static int
476e3b43481SJianfeng Tan vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
477e3b43481SJianfeng Tan 			       uint16_t pair_idx,
478e3b43481SJianfeng Tan 			       int enable)
479e3b43481SJianfeng Tan {
480e3b43481SJianfeng Tan 	int hdr_size;
481e3b43481SJianfeng Tan 	int vhostfd;
482e3b43481SJianfeng Tan 	int tapfd;
483be7a4707SJianfeng Tan 	int req_mq = (dev->max_queue_pairs > 1);
48486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
485e3b43481SJianfeng Tan 
48686388a3aSMaxime Coquelin 	vhostfd = data->vhostfds[pair_idx];
487e3b43481SJianfeng Tan 
48847ac9661STiwei Bie 	if (dev->qp_enabled[pair_idx] == enable)
489e3b43481SJianfeng Tan 		return 0;
49047ac9661STiwei Bie 
49147ac9661STiwei Bie 	if (!enable) {
49286388a3aSMaxime Coquelin 		tapfd = data->tapfds[pair_idx];
49347ac9661STiwei Bie 		if (vhost_kernel_set_backend(vhostfd, -1) < 0) {
49447ac9661STiwei Bie 			PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
49547ac9661STiwei Bie 			return -1;
49647ac9661STiwei Bie 		}
49747ac9661STiwei Bie 		if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) {
49847ac9661STiwei Bie 			PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel");
49947ac9661STiwei Bie 			return -1;
50047ac9661STiwei Bie 		}
50147ac9661STiwei Bie 		dev->qp_enabled[pair_idx] = false;
50247ac9661STiwei Bie 		return 0;
50347ac9661STiwei Bie 	}
50447ac9661STiwei Bie 
50586388a3aSMaxime Coquelin 	if (data->tapfds[pair_idx] >= 0) {
50686388a3aSMaxime Coquelin 		tapfd = data->tapfds[pair_idx];
50747ac9661STiwei Bie 		if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1)
50847ac9661STiwei Bie 			return -1;
50947ac9661STiwei Bie 		if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) {
51047ac9661STiwei Bie 			PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel");
51147ac9661STiwei Bie 			return -1;
51247ac9661STiwei Bie 		}
51347ac9661STiwei Bie 		goto set_backend;
514e3b43481SJianfeng Tan 	}
515e3b43481SJianfeng Tan 
516e3b43481SJianfeng Tan 	if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
517e3b43481SJianfeng Tan 	    (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
518e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
519e3b43481SJianfeng Tan 	else
520e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr);
521e3b43481SJianfeng Tan 
522791b43e0SNing Li 	tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq,
5231db4d233SEric Zhang 			 (char *)dev->mac_addr, dev->features);
524e3b43481SJianfeng Tan 	if (tapfd < 0) {
525e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
526e3b43481SJianfeng Tan 		return -1;
527e3b43481SJianfeng Tan 	}
528e3b43481SJianfeng Tan 
52986388a3aSMaxime Coquelin 	data->tapfds[pair_idx] = tapfd;
53047ac9661STiwei Bie 
53147ac9661STiwei Bie set_backend:
532e3b43481SJianfeng Tan 	if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
533e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
534e3b43481SJianfeng Tan 		return -1;
535e3b43481SJianfeng Tan 	}
536e3b43481SJianfeng Tan 
53747ac9661STiwei Bie 	dev->qp_enabled[pair_idx] = true;
538e3b43481SJianfeng Tan 	return 0;
539e3b43481SJianfeng Tan }
540e3b43481SJianfeng Tan 
5415b75b63cSMaxime Coquelin static int
5425b75b63cSMaxime Coquelin vhost_kernel_get_backend_features(uint64_t *features)
5435b75b63cSMaxime Coquelin {
5445b75b63cSMaxime Coquelin 	*features = 0;
5455b75b63cSMaxime Coquelin 
5465b75b63cSMaxime Coquelin 	return 0;
5475b75b63cSMaxime Coquelin }
5485b75b63cSMaxime Coquelin 
54994973531SMaxime Coquelin static int
55094973531SMaxime Coquelin vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused)
55194973531SMaxime Coquelin {
55294973531SMaxime Coquelin 	/* Nothing to update (Maybe get TAP interface link state?) */
55394973531SMaxime Coquelin 	return 0;
55494973531SMaxime Coquelin }
55594973531SMaxime Coquelin 
55694973531SMaxime Coquelin static int
55794973531SMaxime Coquelin vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused)
55894973531SMaxime Coquelin {
55994973531SMaxime Coquelin 	/* No link state interrupt with Vhost-kernel */
56094973531SMaxime Coquelin 	return -1;
56194973531SMaxime Coquelin }
56294973531SMaxime Coquelin 
563520dd992SFerruh Yigit struct virtio_user_backend_ops virtio_ops_kernel = {
564e3b43481SJianfeng Tan 	.setup = vhost_kernel_setup,
565748e5ea5SMaxime Coquelin 	.destroy = vhost_kernel_destroy,
5665b75b63cSMaxime Coquelin 	.get_backend_features = vhost_kernel_get_backend_features,
56706856cabSMaxime Coquelin 	.set_owner = vhost_kernel_set_owner,
568cc0151b3SMaxime Coquelin 	.get_features = vhost_kernel_get_features,
569cc0151b3SMaxime Coquelin 	.set_features = vhost_kernel_set_features,
570539d910cSMaxime Coquelin 	.set_memory_table = vhost_kernel_set_memory_table,
571ab9098d2SMaxime Coquelin 	.set_vring_num = vhost_kernel_set_vring_num,
572ab9098d2SMaxime Coquelin 	.set_vring_base = vhost_kernel_set_vring_base,
573ab9098d2SMaxime Coquelin 	.get_vring_base = vhost_kernel_get_vring_base,
574ce399c36SMaxime Coquelin 	.set_vring_call = vhost_kernel_set_vring_call,
575ce399c36SMaxime Coquelin 	.set_vring_kick = vhost_kernel_set_vring_kick,
576dc65db73SMaxime Coquelin 	.set_vring_addr = vhost_kernel_set_vring_addr,
5778723c894SMaxime Coquelin 	.get_status = vhost_kernel_get_status,
5788723c894SMaxime Coquelin 	.set_status = vhost_kernel_set_status,
57994973531SMaxime Coquelin 	.enable_qp = vhost_kernel_enable_queue_pair,
58094973531SMaxime Coquelin 	.update_link_state = vhost_kernel_update_link_state,
58194973531SMaxime Coquelin 	.get_intr_fd = vhost_kernel_get_intr_fd,
582e3b43481SJianfeng Tan };
583