xref: /dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c (revision 72b452c5f2599f970f47fd17d3e8e5d60bfebe7a)
15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
25566a3e3SBruce Richardson  * Copyright(c) 2016 Intel Corporation
3e3b43481SJianfeng Tan  */
4e3b43481SJianfeng Tan 
5e3b43481SJianfeng Tan #include <sys/types.h>
6e3b43481SJianfeng Tan #include <sys/stat.h>
7e3b43481SJianfeng Tan #include <fcntl.h>
8e3b43481SJianfeng Tan #include <unistd.h>
906856cabSMaxime Coquelin #include <errno.h>
10*72b452c5SDmitry Kozlyuk #include <stdlib.h>
11e3b43481SJianfeng Tan 
12e3b43481SJianfeng Tan #include <rte_memory.h>
13e3b43481SJianfeng Tan 
14e3b43481SJianfeng Tan #include "vhost.h"
15e3b43481SJianfeng Tan #include "virtio_user_dev.h"
16e3b43481SJianfeng Tan #include "vhost_kernel_tap.h"
17e3b43481SJianfeng Tan 
1886388a3aSMaxime Coquelin struct vhost_kernel_data {
1986388a3aSMaxime Coquelin 	int *vhostfds;
2086388a3aSMaxime Coquelin 	int *tapfds;
2186388a3aSMaxime Coquelin };
2286388a3aSMaxime Coquelin 
23e3b43481SJianfeng Tan struct vhost_memory_kernel {
24e3b43481SJianfeng Tan 	uint32_t nregions;
25e3b43481SJianfeng Tan 	uint32_t padding;
26013b4c52SBruce Richardson 	struct vhost_memory_region regions[];
27e3b43481SJianfeng Tan };
28e3b43481SJianfeng Tan 
29e3b43481SJianfeng Tan /* vhost kernel ioctls */
30e3b43481SJianfeng Tan #define VHOST_VIRTIO 0xAF
31e3b43481SJianfeng Tan #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
32e3b43481SJianfeng Tan #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
33e3b43481SJianfeng Tan #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
34e3b43481SJianfeng Tan #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
35e3b43481SJianfeng Tan #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel)
36e3b43481SJianfeng Tan #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
37e3b43481SJianfeng Tan #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
38e3b43481SJianfeng Tan #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
39e3b43481SJianfeng Tan #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
40e3b43481SJianfeng Tan #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
41e3b43481SJianfeng Tan #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
42e3b43481SJianfeng Tan #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
43e3b43481SJianfeng Tan #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
44e3b43481SJianfeng Tan #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
45e3b43481SJianfeng Tan #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
46e3b43481SJianfeng Tan 
47cc0151b3SMaxime Coquelin /* with below features, vhost kernel does not need to do the checksum and TSO,
48cc0151b3SMaxime Coquelin  * these info will be passed to virtio_user through virtio net header.
49cc0151b3SMaxime Coquelin  */
50cc0151b3SMaxime Coquelin #define VHOST_KERNEL_GUEST_OFFLOADS_MASK	\
51cc0151b3SMaxime Coquelin 	((1ULL << VIRTIO_NET_F_GUEST_CSUM) |	\
52cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |	\
53cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |	\
54cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |	\
55cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_GUEST_UFO))
56cc0151b3SMaxime Coquelin 
57cc0151b3SMaxime Coquelin /* with below features, when flows from virtio_user to vhost kernel
58cc0151b3SMaxime Coquelin  * (1) if flows goes up through the kernel networking stack, it does not need
59cc0151b3SMaxime Coquelin  * to verify checksum, which can save CPU cycles;
60cc0151b3SMaxime Coquelin  * (2) if flows goes through a Linux bridge and outside from an interface
61cc0151b3SMaxime Coquelin  * (kernel driver), checksum and TSO will be done by GSO in kernel or even
62cc0151b3SMaxime Coquelin  * offloaded into real physical device.
63cc0151b3SMaxime Coquelin  */
64cc0151b3SMaxime Coquelin #define VHOST_KERNEL_HOST_OFFLOADS_MASK		\
65cc0151b3SMaxime Coquelin 	((1ULL << VIRTIO_NET_F_HOST_TSO4) |	\
66cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_HOST_TSO6) |	\
67cc0151b3SMaxime Coquelin 	 (1ULL << VIRTIO_NET_F_CSUM))
68cc0151b3SMaxime Coquelin 
69e3b43481SJianfeng Tan static uint64_t max_regions = 64;
70e3b43481SJianfeng Tan 
71e3b43481SJianfeng Tan static void
get_vhost_kernel_max_regions(void)72e3b43481SJianfeng Tan get_vhost_kernel_max_regions(void)
73e3b43481SJianfeng Tan {
74e3b43481SJianfeng Tan 	int fd;
75e3b43481SJianfeng Tan 	char buf[20] = {'\0'};
76e3b43481SJianfeng Tan 
77e3b43481SJianfeng Tan 	fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY);
78e3b43481SJianfeng Tan 	if (fd < 0)
79e3b43481SJianfeng Tan 		return;
80e3b43481SJianfeng Tan 
81e3b43481SJianfeng Tan 	if (read(fd, buf, sizeof(buf) - 1) > 0)
82e3b43481SJianfeng Tan 		max_regions = strtoull(buf, NULL, 10);
83e3b43481SJianfeng Tan 
84e3b43481SJianfeng Tan 	close(fd);
85e3b43481SJianfeng Tan }
86e3b43481SJianfeng Tan 
8706856cabSMaxime Coquelin static int
vhost_kernel_ioctl(int fd,uint64_t request,void * arg)8806856cabSMaxime Coquelin vhost_kernel_ioctl(int fd, uint64_t request, void *arg)
8906856cabSMaxime Coquelin {
9006856cabSMaxime Coquelin 	int ret;
9106856cabSMaxime Coquelin 
9206856cabSMaxime Coquelin 	ret = ioctl(fd, request, arg);
9306856cabSMaxime Coquelin 	if (ret) {
9406856cabSMaxime Coquelin 		PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)",
9506856cabSMaxime Coquelin 				request, strerror(errno));
9606856cabSMaxime Coquelin 		return -1;
9706856cabSMaxime Coquelin 	}
9806856cabSMaxime Coquelin 
9906856cabSMaxime Coquelin 	return 0;
10006856cabSMaxime Coquelin }
10106856cabSMaxime Coquelin 
10206856cabSMaxime Coquelin static int
vhost_kernel_set_owner(struct virtio_user_dev * dev)10306856cabSMaxime Coquelin vhost_kernel_set_owner(struct virtio_user_dev *dev)
10406856cabSMaxime Coquelin {
1053c503b24SThierry Herbelot 	int ret;
1063c503b24SThierry Herbelot 	uint32_t i;
10786388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
10886388a3aSMaxime Coquelin 
1093c503b24SThierry Herbelot 	for (i = 0; i < dev->max_queue_pairs; ++i) {
1103c503b24SThierry Herbelot 		if (data->vhostfds[i] < 0)
1113c503b24SThierry Herbelot 			continue;
1123c503b24SThierry Herbelot 
1133c503b24SThierry Herbelot 		ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_OWNER, NULL);
1143c503b24SThierry Herbelot 		if (ret < 0)
1153c503b24SThierry Herbelot 			return ret;
1163c503b24SThierry Herbelot 	}
1173c503b24SThierry Herbelot 
1183c503b24SThierry Herbelot 	return 0;
11906856cabSMaxime Coquelin }
12006856cabSMaxime Coquelin 
121cc0151b3SMaxime Coquelin static int
vhost_kernel_get_features(struct virtio_user_dev * dev,uint64_t * features)122cc0151b3SMaxime Coquelin vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
123cc0151b3SMaxime Coquelin {
12486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
125b72099beSDavid Marchand 	unsigned int tap_flags;
126b72099beSDavid Marchand 	int ret;
127cc0151b3SMaxime Coquelin 
12886388a3aSMaxime Coquelin 	ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features);
129cc0151b3SMaxime Coquelin 	if (ret < 0) {
130cc0151b3SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to get features");
131cc0151b3SMaxime Coquelin 		return -1;
132cc0151b3SMaxime Coquelin 	}
133cc0151b3SMaxime Coquelin 
134b72099beSDavid Marchand 	ret = tap_get_flags(data->tapfds[0], &tap_flags);
135cc0151b3SMaxime Coquelin 	if (ret < 0) {
136cc0151b3SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to get TAP features");
137cc0151b3SMaxime Coquelin 		return -1;
138cc0151b3SMaxime Coquelin 	}
139cc0151b3SMaxime Coquelin 
140cc0151b3SMaxime Coquelin 	/* with tap as the backend, all these features are supported
141cc0151b3SMaxime Coquelin 	 * but not claimed by vhost-net, so we add them back when
142cc0151b3SMaxime Coquelin 	 * reporting to upper layer.
143cc0151b3SMaxime Coquelin 	 */
144b72099beSDavid Marchand 	if (tap_flags & IFF_VNET_HDR) {
145cc0151b3SMaxime Coquelin 		*features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
146cc0151b3SMaxime Coquelin 		*features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
147cc0151b3SMaxime Coquelin 	}
148cc0151b3SMaxime Coquelin 
149cc0151b3SMaxime Coquelin 	/* vhost_kernel will not declare this feature, but it does
150cc0151b3SMaxime Coquelin 	 * support multi-queue.
151cc0151b3SMaxime Coquelin 	 */
152b72099beSDavid Marchand 	if (tap_flags & IFF_MULTI_QUEUE)
153cc0151b3SMaxime Coquelin 		*features |= (1ull << VIRTIO_NET_F_MQ);
154cc0151b3SMaxime Coquelin 
155cc0151b3SMaxime Coquelin 	return 0;
156cc0151b3SMaxime Coquelin }
157cc0151b3SMaxime Coquelin 
158cc0151b3SMaxime Coquelin static int
vhost_kernel_set_features(struct virtio_user_dev * dev,uint64_t features)159cc0151b3SMaxime Coquelin vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features)
160cc0151b3SMaxime Coquelin {
16186388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
162cf412ff7SThierry Herbelot 	uint32_t i;
163cf412ff7SThierry Herbelot 	int ret;
16486388a3aSMaxime Coquelin 
165cc0151b3SMaxime Coquelin 	/* We don't need memory protection here */
166cc0151b3SMaxime Coquelin 	features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
167cc0151b3SMaxime Coquelin 	/* VHOST kernel does not know about below flags */
168cc0151b3SMaxime Coquelin 	features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
169cc0151b3SMaxime Coquelin 	features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
170cc0151b3SMaxime Coquelin 	features &= ~(1ULL << VIRTIO_NET_F_MQ);
171cc0151b3SMaxime Coquelin 
172cf412ff7SThierry Herbelot 	for (i = 0; i < dev->max_queue_pairs; ++i) {
173cf412ff7SThierry Herbelot 		if (data->vhostfds[i] < 0)
174cf412ff7SThierry Herbelot 			continue;
175cf412ff7SThierry Herbelot 
176cf412ff7SThierry Herbelot 		ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_FEATURES, &features);
177cf412ff7SThierry Herbelot 		if (ret < 0)
178cf412ff7SThierry Herbelot 			return ret;
179cf412ff7SThierry Herbelot 	}
180cf412ff7SThierry Herbelot 
181cf412ff7SThierry Herbelot 	return 0;
182cc0151b3SMaxime Coquelin }
183cc0151b3SMaxime Coquelin 
184746c346dSAnatoly Burakov static int
add_memseg_list(const struct rte_memseg_list * msl,void * arg)18575728654STiwei Bie add_memseg_list(const struct rte_memseg_list *msl, void *arg)
186746c346dSAnatoly Burakov {
18775728654STiwei Bie 	struct vhost_memory_kernel *vm = arg;
188746c346dSAnatoly Burakov 	struct vhost_memory_region *mr;
189746c346dSAnatoly Burakov 	void *start_addr;
19075728654STiwei Bie 	uint64_t len;
191746c346dSAnatoly Burakov 
1925282bb1cSAnatoly Burakov 	if (msl->external)
1935282bb1cSAnatoly Burakov 		return 0;
1945282bb1cSAnatoly Burakov 
19575728654STiwei Bie 	if (vm->nregions >= max_regions)
196746c346dSAnatoly Burakov 		return -1;
197746c346dSAnatoly Burakov 
19875728654STiwei Bie 	start_addr = msl->base_va;
19975728654STiwei Bie 	len = msl->page_sz * msl->memseg_arr.len;
20075728654STiwei Bie 
20175728654STiwei Bie 	mr = &vm->regions[vm->nregions++];
202746c346dSAnatoly Burakov 
203746c346dSAnatoly Burakov 	mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr;
204746c346dSAnatoly Burakov 	mr->userspace_addr = (uint64_t)(uintptr_t)start_addr;
205746c346dSAnatoly Burakov 	mr->memory_size = len;
20675728654STiwei Bie 	mr->mmap_offset = 0; /* flags_padding */
20775728654STiwei Bie 
20875728654STiwei Bie 	PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64,
20975728654STiwei Bie 			vm->nregions - 1, start_addr, len);
210746c346dSAnatoly Burakov 
211746c346dSAnatoly Burakov 	return 0;
212746c346dSAnatoly Burakov }
213746c346dSAnatoly Burakov 
21475728654STiwei Bie /* By default, vhost kernel module allows 64 regions, but DPDK may
21575728654STiwei Bie  * have much more memory regions. Below function will treat each
21675728654STiwei Bie  * contiguous memory space reserved by DPDK as one region.
217e3b43481SJianfeng Tan  */
218539d910cSMaxime Coquelin static int
vhost_kernel_set_memory_table(struct virtio_user_dev * dev)219539d910cSMaxime Coquelin vhost_kernel_set_memory_table(struct virtio_user_dev *dev)
220e3b43481SJianfeng Tan {
221b01e1e9aSThierry Herbelot 	uint32_t i;
22286388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
223e3b43481SJianfeng Tan 	struct vhost_memory_kernel *vm;
224539d910cSMaxime Coquelin 	int ret;
225e3b43481SJianfeng Tan 
226e3b43481SJianfeng Tan 	vm = malloc(sizeof(struct vhost_memory_kernel) +
227e3b43481SJianfeng Tan 			max_regions *
228e3b43481SJianfeng Tan 			sizeof(struct vhost_memory_region));
2291e9057a9SJianfeng Tan 	if (!vm)
230539d910cSMaxime Coquelin 		goto err;
231e3b43481SJianfeng Tan 
23275728654STiwei Bie 	vm->nregions = 0;
23375728654STiwei Bie 	vm->padding = 0;
234e3b43481SJianfeng Tan 
2357ff26957STiwei Bie 	/*
2367ff26957STiwei Bie 	 * The memory lock has already been taken by memory subsystem
2377ff26957STiwei Bie 	 * or virtio_user_start_device().
2387ff26957STiwei Bie 	 */
239539d910cSMaxime Coquelin 	ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm);
240539d910cSMaxime Coquelin 	if (ret < 0)
241539d910cSMaxime Coquelin 		goto err_free;
242539d910cSMaxime Coquelin 
243b01e1e9aSThierry Herbelot 	for (i = 0; i < dev->max_queue_pairs; ++i) {
244b01e1e9aSThierry Herbelot 		if (data->vhostfds[i] < 0)
245b01e1e9aSThierry Herbelot 			continue;
246b01e1e9aSThierry Herbelot 
247b01e1e9aSThierry Herbelot 		ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_MEM_TABLE, vm);
248539d910cSMaxime Coquelin 		if (ret < 0)
249539d910cSMaxime Coquelin 			goto err_free;
250b01e1e9aSThierry Herbelot 	}
251539d910cSMaxime Coquelin 
252e3b43481SJianfeng Tan 	free(vm);
253539d910cSMaxime Coquelin 
254539d910cSMaxime Coquelin 	return 0;
255539d910cSMaxime Coquelin err_free:
256539d910cSMaxime Coquelin 	free(vm);
257539d910cSMaxime Coquelin err:
258539d910cSMaxime Coquelin 	PMD_DRV_LOG(ERR, "Failed to set memory table");
259539d910cSMaxime Coquelin 	return -1;
260e3b43481SJianfeng Tan }
261e3b43481SJianfeng Tan 
262ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring(struct virtio_user_dev * dev,uint64_t req,struct vhost_vring_state * state)263ab9098d2SMaxime Coquelin vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state)
264ab9098d2SMaxime Coquelin {
265ab9098d2SMaxime Coquelin 	int ret, fd;
266ab9098d2SMaxime Coquelin 	unsigned int index = state->index;
26786388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
268ab9098d2SMaxime Coquelin 
269ab9098d2SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
27086388a3aSMaxime Coquelin 	fd = data->vhostfds[state->index / 2];
271ab9098d2SMaxime Coquelin 	state->index %= 2;
272ab9098d2SMaxime Coquelin 
273ab9098d2SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, req, state);
274ab9098d2SMaxime Coquelin 	if (ret < 0) {
275ab9098d2SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req);
276ab9098d2SMaxime Coquelin 		return -1;
277ab9098d2SMaxime Coquelin 	}
278ab9098d2SMaxime Coquelin 
279ab9098d2SMaxime Coquelin 	/* restore index back to queue index */
280ab9098d2SMaxime Coquelin 	state->index = index;
281ab9098d2SMaxime Coquelin 
282ab9098d2SMaxime Coquelin 	return 0;
283ab9098d2SMaxime Coquelin }
284ab9098d2SMaxime Coquelin 
285ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring_num(struct virtio_user_dev * dev,struct vhost_vring_state * state)286ab9098d2SMaxime Coquelin vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
287ab9098d2SMaxime Coquelin {
288ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state);
289ab9098d2SMaxime Coquelin }
290ab9098d2SMaxime Coquelin 
291ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring_base(struct virtio_user_dev * dev,struct vhost_vring_state * state)292ab9098d2SMaxime Coquelin vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
293ab9098d2SMaxime Coquelin {
294ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state);
295ab9098d2SMaxime Coquelin }
296ab9098d2SMaxime Coquelin 
297ab9098d2SMaxime Coquelin static int
vhost_kernel_get_vring_base(struct virtio_user_dev * dev,struct vhost_vring_state * state)298ab9098d2SMaxime Coquelin vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
299ab9098d2SMaxime Coquelin {
300ab9098d2SMaxime Coquelin 	return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state);
301ab9098d2SMaxime Coquelin }
302ab9098d2SMaxime Coquelin 
303ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_file(struct virtio_user_dev * dev,uint64_t req,struct vhost_vring_file * file)304ce399c36SMaxime Coquelin vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req,
305ce399c36SMaxime Coquelin 		struct vhost_vring_file *file)
306ce399c36SMaxime Coquelin {
307ce399c36SMaxime Coquelin 	int ret, fd;
308ce399c36SMaxime Coquelin 	unsigned int index = file->index;
30986388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
310ce399c36SMaxime Coquelin 
311ce399c36SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
31286388a3aSMaxime Coquelin 	fd = data->vhostfds[file->index / 2];
313ce399c36SMaxime Coquelin 	file->index %= 2;
314ce399c36SMaxime Coquelin 
315ce399c36SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, req, file);
316ce399c36SMaxime Coquelin 	if (ret < 0) {
317ce399c36SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req);
318ce399c36SMaxime Coquelin 		return -1;
319ce399c36SMaxime Coquelin 	}
320ce399c36SMaxime Coquelin 
321ce399c36SMaxime Coquelin 	/* restore index back to queue index */
322ce399c36SMaxime Coquelin 	file->index = index;
323ce399c36SMaxime Coquelin 
324ce399c36SMaxime Coquelin 	return 0;
325ce399c36SMaxime Coquelin }
326ce399c36SMaxime Coquelin 
327ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_kick(struct virtio_user_dev * dev,struct vhost_vring_file * file)328ce399c36SMaxime Coquelin vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
329ce399c36SMaxime Coquelin {
330ce399c36SMaxime Coquelin 	return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file);
331ce399c36SMaxime Coquelin }
332ce399c36SMaxime Coquelin 
333ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_call(struct virtio_user_dev * dev,struct vhost_vring_file * file)334ce399c36SMaxime Coquelin vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
335ce399c36SMaxime Coquelin {
336ce399c36SMaxime Coquelin 	return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file);
337ce399c36SMaxime Coquelin }
338ce399c36SMaxime Coquelin 
339dc65db73SMaxime Coquelin static int
vhost_kernel_set_vring_addr(struct virtio_user_dev * dev,struct vhost_vring_addr * addr)340dc65db73SMaxime Coquelin vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
341dc65db73SMaxime Coquelin {
342dc65db73SMaxime Coquelin 	int ret, fd;
343dc65db73SMaxime Coquelin 	unsigned int index = addr->index;
34486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
345dc65db73SMaxime Coquelin 
346dc65db73SMaxime Coquelin 	/* Convert from queue index to queue-pair & offset */
34786388a3aSMaxime Coquelin 	fd = data->vhostfds[addr->index / 2];
348dc65db73SMaxime Coquelin 	addr->index %= 2;
349dc65db73SMaxime Coquelin 
350dc65db73SMaxime Coquelin 	ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr);
351dc65db73SMaxime Coquelin 	if (ret < 0) {
352dc65db73SMaxime Coquelin 		PMD_DRV_LOG(ERR, "Failed to set vring address");
353dc65db73SMaxime Coquelin 		return -1;
354dc65db73SMaxime Coquelin 	}
355dc65db73SMaxime Coquelin 
356dc65db73SMaxime Coquelin 	/* restore index back to queue index */
357dc65db73SMaxime Coquelin 	addr->index = index;
358dc65db73SMaxime Coquelin 
359dc65db73SMaxime Coquelin 	return 0;
360dc65db73SMaxime Coquelin }
361dc65db73SMaxime Coquelin 
3628723c894SMaxime Coquelin static int
vhost_kernel_get_status(struct virtio_user_dev * dev __rte_unused,uint8_t * status __rte_unused)3638723c894SMaxime Coquelin vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused)
3648723c894SMaxime Coquelin {
3658723c894SMaxime Coquelin 	return -ENOTSUP;
3668723c894SMaxime Coquelin }
3678723c894SMaxime Coquelin 
3688723c894SMaxime Coquelin static int
vhost_kernel_set_status(struct virtio_user_dev * dev __rte_unused,uint8_t status __rte_unused)3698723c894SMaxime Coquelin vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused)
3708723c894SMaxime Coquelin {
3718723c894SMaxime Coquelin 	return -ENOTSUP;
3728723c894SMaxime Coquelin }
3738723c894SMaxime Coquelin 
374e3b43481SJianfeng Tan /**
375e3b43481SJianfeng Tan  * Set up environment to talk with a vhost kernel backend.
376e3b43481SJianfeng Tan  *
377e3b43481SJianfeng Tan  * @return
378e3b43481SJianfeng Tan  *   - (-1) if fail to set up;
379e3b43481SJianfeng Tan  *   - (>=0) if successful.
380e3b43481SJianfeng Tan  */
381e3b43481SJianfeng Tan static int
vhost_kernel_setup(struct virtio_user_dev * dev)382e3b43481SJianfeng Tan vhost_kernel_setup(struct virtio_user_dev *dev)
383e3b43481SJianfeng Tan {
38486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data;
385b72099beSDavid Marchand 	unsigned int tap_features;
386b72099beSDavid Marchand 	unsigned int tap_flags;
387666ff776SHarold Huang 	unsigned int r_flags;
388b72099beSDavid Marchand 	const char *ifname;
389b72099beSDavid Marchand 	uint32_t q, i;
390b72099beSDavid Marchand 	int vhostfd;
391b72099beSDavid Marchand 
392b72099beSDavid Marchand 	if (tap_support_features(&tap_features) < 0)
393b72099beSDavid Marchand 		return -1;
394b72099beSDavid Marchand 
395b72099beSDavid Marchand 	if ((tap_features & IFF_VNET_HDR) == 0) {
396b72099beSDavid Marchand 		PMD_INIT_LOG(ERR, "TAP does not support IFF_VNET_HDR");
397b72099beSDavid Marchand 		return -1;
398b72099beSDavid Marchand 	}
399666ff776SHarold Huang 	r_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
400666ff776SHarold Huang 
401666ff776SHarold Huang 	if (tap_features & IFF_NAPI)
402666ff776SHarold Huang 		r_flags |= IFF_NAPI;
40386388a3aSMaxime Coquelin 
40486388a3aSMaxime Coquelin 	data = malloc(sizeof(*data));
40586388a3aSMaxime Coquelin 	if (!data) {
40686388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path);
40786388a3aSMaxime Coquelin 		return -1;
40886388a3aSMaxime Coquelin 	}
40986388a3aSMaxime Coquelin 
41086388a3aSMaxime Coquelin 	data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
41186388a3aSMaxime Coquelin 	if (!data->vhostfds) {
41286388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path);
41386388a3aSMaxime Coquelin 		goto err_data;
41486388a3aSMaxime Coquelin 	}
41586388a3aSMaxime Coquelin 	data->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
41686388a3aSMaxime Coquelin 	if (!data->tapfds) {
41786388a3aSMaxime Coquelin 		PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path);
41886388a3aSMaxime Coquelin 		goto err_vhostfds;
41986388a3aSMaxime Coquelin 	}
42086388a3aSMaxime Coquelin 
42186388a3aSMaxime Coquelin 	for (q = 0; q < dev->max_queue_pairs; ++q) {
42286388a3aSMaxime Coquelin 		data->vhostfds[q] = -1;
42386388a3aSMaxime Coquelin 		data->tapfds[q] = -1;
42486388a3aSMaxime Coquelin 	}
425e3b43481SJianfeng Tan 
426e3b43481SJianfeng Tan 	get_vhost_kernel_max_regions();
427e3b43481SJianfeng Tan 
428e3b43481SJianfeng Tan 	for (i = 0; i < dev->max_queue_pairs; ++i) {
429e3b43481SJianfeng Tan 		vhostfd = open(dev->path, O_RDWR);
430e3b43481SJianfeng Tan 		if (vhostfd < 0) {
43186388a3aSMaxime Coquelin 			PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno));
43286388a3aSMaxime Coquelin 			goto err_tapfds;
43386388a3aSMaxime Coquelin 		}
43486388a3aSMaxime Coquelin 		data->vhostfds[i] = vhostfd;
43586388a3aSMaxime Coquelin 	}
43686388a3aSMaxime Coquelin 
437b72099beSDavid Marchand 	ifname = dev->ifname != NULL ? dev->ifname : "tap%d";
438666ff776SHarold Huang 	data->tapfds[0] = tap_open(ifname, r_flags, (tap_features & IFF_MULTI_QUEUE) != 0);
439b72099beSDavid Marchand 	if (data->tapfds[0] < 0)
440b72099beSDavid Marchand 		goto err_tapfds;
441b72099beSDavid Marchand 	if (dev->ifname == NULL && tap_get_name(data->tapfds[0], &dev->ifname) < 0) {
442b72099beSDavid Marchand 		PMD_DRV_LOG(ERR, "fail to get tap name (%d)", data->tapfds[0]);
443b72099beSDavid Marchand 		goto err_tapfds;
444b72099beSDavid Marchand 	}
445b72099beSDavid Marchand 	if (tap_get_flags(data->tapfds[0], &tap_flags) < 0) {
446b72099beSDavid Marchand 		PMD_DRV_LOG(ERR, "fail to get tap flags for tap %s", dev->ifname);
447b72099beSDavid Marchand 		goto err_tapfds;
448b72099beSDavid Marchand 	}
449b72099beSDavid Marchand 	if ((tap_flags & IFF_MULTI_QUEUE) == 0 && dev->max_queue_pairs > 1) {
450b72099beSDavid Marchand 		PMD_DRV_LOG(ERR, "tap %s does not support multi queue", dev->ifname);
451b72099beSDavid Marchand 		goto err_tapfds;
452b72099beSDavid Marchand 	}
453b72099beSDavid Marchand 
454b72099beSDavid Marchand 	for (i = 1; i < dev->max_queue_pairs; i++) {
455666ff776SHarold Huang 		data->tapfds[i] = tap_open(dev->ifname, r_flags, true);
456b72099beSDavid Marchand 		if (data->tapfds[i] < 0)
457b72099beSDavid Marchand 			goto err_tapfds;
458b72099beSDavid Marchand 	}
459b72099beSDavid Marchand 
46086388a3aSMaxime Coquelin 	dev->backend_data = data;
46186388a3aSMaxime Coquelin 
46286388a3aSMaxime Coquelin 	return 0;
46386388a3aSMaxime Coquelin 
46486388a3aSMaxime Coquelin err_tapfds:
465b72099beSDavid Marchand 	for (i = 0; i < dev->max_queue_pairs; i++) {
46686388a3aSMaxime Coquelin 		if (data->vhostfds[i] >= 0)
46786388a3aSMaxime Coquelin 			close(data->vhostfds[i]);
468b72099beSDavid Marchand 		if (data->tapfds[i] >= 0)
469b72099beSDavid Marchand 			close(data->tapfds[i]);
470b72099beSDavid Marchand 	}
47186388a3aSMaxime Coquelin 
47286388a3aSMaxime Coquelin 	free(data->tapfds);
47386388a3aSMaxime Coquelin err_vhostfds:
47486388a3aSMaxime Coquelin 	free(data->vhostfds);
47586388a3aSMaxime Coquelin err_data:
47686388a3aSMaxime Coquelin 	free(data);
47786388a3aSMaxime Coquelin 
478e3b43481SJianfeng Tan 	return -1;
479e3b43481SJianfeng Tan }
480e3b43481SJianfeng Tan 
481e3b43481SJianfeng Tan static int
vhost_kernel_destroy(struct virtio_user_dev * dev)48286388a3aSMaxime Coquelin vhost_kernel_destroy(struct virtio_user_dev *dev)
483748e5ea5SMaxime Coquelin {
48486388a3aSMaxime Coquelin 	struct vhost_kernel_data *data = dev->backend_data;
48586388a3aSMaxime Coquelin 	uint32_t i;
48686388a3aSMaxime Coquelin 
48786388a3aSMaxime Coquelin 	if (!data)
48886388a3aSMaxime Coquelin 		return 0;
48986388a3aSMaxime Coquelin 
49086388a3aSMaxime Coquelin 	for (i = 0; i < dev->max_queue_pairs; ++i) {
49186388a3aSMaxime Coquelin 		if (data->vhostfds[i] >= 0)
49286388a3aSMaxime Coquelin 			close(data->vhostfds[i]);
49386388a3aSMaxime Coquelin 		if (data->tapfds[i] >= 0)
49486388a3aSMaxime Coquelin 			close(data->tapfds[i]);
49586388a3aSMaxime Coquelin 	}
49686388a3aSMaxime Coquelin 
49786388a3aSMaxime Coquelin 	free(data->vhostfds);
49886388a3aSMaxime Coquelin 	free(data->tapfds);
49986388a3aSMaxime Coquelin 	free(data);
50086388a3aSMaxime Coquelin 	dev->backend_data = NULL;
50186388a3aSMaxime Coquelin 
502748e5ea5SMaxime Coquelin 	return 0;
503748e5ea5SMaxime Coquelin }
504748e5ea5SMaxime Coquelin 
505748e5ea5SMaxime Coquelin static int
vhost_kernel_set_backend(int vhostfd,int tapfd)506e3b43481SJianfeng Tan vhost_kernel_set_backend(int vhostfd, int tapfd)
507e3b43481SJianfeng Tan {
508e3b43481SJianfeng Tan 	struct vhost_vring_file f;
509e3b43481SJianfeng Tan 
510e3b43481SJianfeng Tan 	f.fd = tapfd;
511e3b43481SJianfeng Tan 	f.index = 0;
512e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
513e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
514e3b43481SJianfeng Tan 				strerror(errno));
515e3b43481SJianfeng Tan 		return -1;
516e3b43481SJianfeng Tan 	}
517e3b43481SJianfeng Tan 
518e3b43481SJianfeng Tan 	f.index = 1;
519e3b43481SJianfeng Tan 	if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
520e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
521e3b43481SJianfeng Tan 				strerror(errno));
522e3b43481SJianfeng Tan 		return -1;
523e3b43481SJianfeng Tan 	}
524e3b43481SJianfeng Tan 
525e3b43481SJianfeng Tan 	return 0;
526e3b43481SJianfeng Tan }
527e3b43481SJianfeng Tan 
528e3b43481SJianfeng Tan static int
vhost_kernel_enable_queue_pair(struct virtio_user_dev * dev,uint16_t pair_idx,int enable)529e3b43481SJianfeng Tan vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
530e3b43481SJianfeng Tan 			       uint16_t pair_idx,
531e3b43481SJianfeng Tan 			       int enable)
532e3b43481SJianfeng Tan {
533b72099beSDavid Marchand 	struct vhost_kernel_data *data = dev->backend_data;
534e3b43481SJianfeng Tan 	int hdr_size;
535e3b43481SJianfeng Tan 	int vhostfd;
536e3b43481SJianfeng Tan 	int tapfd;
537e3b43481SJianfeng Tan 
53847ac9661STiwei Bie 	if (dev->qp_enabled[pair_idx] == enable)
539e3b43481SJianfeng Tan 		return 0;
54047ac9661STiwei Bie 
541b72099beSDavid Marchand 	vhostfd = data->vhostfds[pair_idx];
54286388a3aSMaxime Coquelin 	tapfd = data->tapfds[pair_idx];
543b72099beSDavid Marchand 
544b72099beSDavid Marchand 	if (!enable) {
54547ac9661STiwei Bie 		if (vhost_kernel_set_backend(vhostfd, -1) < 0) {
54647ac9661STiwei Bie 			PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
54747ac9661STiwei Bie 			return -1;
54847ac9661STiwei Bie 		}
54947ac9661STiwei Bie 		dev->qp_enabled[pair_idx] = false;
55047ac9661STiwei Bie 		return 0;
55147ac9661STiwei Bie 	}
55247ac9661STiwei Bie 
553e3b43481SJianfeng Tan 	if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
554e3b43481SJianfeng Tan 	    (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
555e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
556e3b43481SJianfeng Tan 	else
557e3b43481SJianfeng Tan 		hdr_size = sizeof(struct virtio_net_hdr);
558e3b43481SJianfeng Tan 
559b72099beSDavid Marchand 	/* Set mac on tap only once when starting */
560b72099beSDavid Marchand 	if (!dev->started && pair_idx == 0 &&
561b72099beSDavid Marchand 			tap_set_mac(data->tapfds[pair_idx], dev->mac_addr) < 0)
562b72099beSDavid Marchand 		return -1;
563b72099beSDavid Marchand 
564b72099beSDavid Marchand 	if (vhost_kernel_tap_setup(tapfd, hdr_size, dev->features) < 0) {
565b72099beSDavid Marchand 		PMD_DRV_LOG(ERR, "fail to setup tap for vhost kernel");
566e3b43481SJianfeng Tan 		return -1;
567e3b43481SJianfeng Tan 	}
568e3b43481SJianfeng Tan 
569e3b43481SJianfeng Tan 	if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
570e3b43481SJianfeng Tan 		PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
571e3b43481SJianfeng Tan 		return -1;
572e3b43481SJianfeng Tan 	}
573e3b43481SJianfeng Tan 
57447ac9661STiwei Bie 	dev->qp_enabled[pair_idx] = true;
575e3b43481SJianfeng Tan 	return 0;
576e3b43481SJianfeng Tan }
577e3b43481SJianfeng Tan 
5785b75b63cSMaxime Coquelin static int
vhost_kernel_get_backend_features(uint64_t * features)5795b75b63cSMaxime Coquelin vhost_kernel_get_backend_features(uint64_t *features)
5805b75b63cSMaxime Coquelin {
5815b75b63cSMaxime Coquelin 	*features = 0;
5825b75b63cSMaxime Coquelin 
5835b75b63cSMaxime Coquelin 	return 0;
5845b75b63cSMaxime Coquelin }
5855b75b63cSMaxime Coquelin 
58694973531SMaxime Coquelin static int
vhost_kernel_update_link_state(struct virtio_user_dev * dev __rte_unused)58794973531SMaxime Coquelin vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused)
58894973531SMaxime Coquelin {
58994973531SMaxime Coquelin 	/* Nothing to update (Maybe get TAP interface link state?) */
59094973531SMaxime Coquelin 	return 0;
59194973531SMaxime Coquelin }
59294973531SMaxime Coquelin 
59394973531SMaxime Coquelin static int
vhost_kernel_get_intr_fd(struct virtio_user_dev * dev __rte_unused)59494973531SMaxime Coquelin vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused)
59594973531SMaxime Coquelin {
59694973531SMaxime Coquelin 	/* No link state interrupt with Vhost-kernel */
59794973531SMaxime Coquelin 	return -1;
59894973531SMaxime Coquelin }
59994973531SMaxime Coquelin 
600520dd992SFerruh Yigit struct virtio_user_backend_ops virtio_ops_kernel = {
601e3b43481SJianfeng Tan 	.setup = vhost_kernel_setup,
602748e5ea5SMaxime Coquelin 	.destroy = vhost_kernel_destroy,
6035b75b63cSMaxime Coquelin 	.get_backend_features = vhost_kernel_get_backend_features,
60406856cabSMaxime Coquelin 	.set_owner = vhost_kernel_set_owner,
605cc0151b3SMaxime Coquelin 	.get_features = vhost_kernel_get_features,
606cc0151b3SMaxime Coquelin 	.set_features = vhost_kernel_set_features,
607539d910cSMaxime Coquelin 	.set_memory_table = vhost_kernel_set_memory_table,
608ab9098d2SMaxime Coquelin 	.set_vring_num = vhost_kernel_set_vring_num,
609ab9098d2SMaxime Coquelin 	.set_vring_base = vhost_kernel_set_vring_base,
610ab9098d2SMaxime Coquelin 	.get_vring_base = vhost_kernel_get_vring_base,
611ce399c36SMaxime Coquelin 	.set_vring_call = vhost_kernel_set_vring_call,
612ce399c36SMaxime Coquelin 	.set_vring_kick = vhost_kernel_set_vring_kick,
613dc65db73SMaxime Coquelin 	.set_vring_addr = vhost_kernel_set_vring_addr,
6148723c894SMaxime Coquelin 	.get_status = vhost_kernel_get_status,
6158723c894SMaxime Coquelin 	.set_status = vhost_kernel_set_status,
61694973531SMaxime Coquelin 	.enable_qp = vhost_kernel_enable_queue_pair,
61794973531SMaxime Coquelin 	.update_link_state = vhost_kernel_update_link_state,
61894973531SMaxime Coquelin 	.get_intr_fd = vhost_kernel_get_intr_fd,
619e3b43481SJianfeng Tan };
620