15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
25566a3e3SBruce Richardson * Copyright(c) 2016 Intel Corporation
3e3b43481SJianfeng Tan */
4e3b43481SJianfeng Tan
5e3b43481SJianfeng Tan #include <sys/types.h>
6e3b43481SJianfeng Tan #include <sys/stat.h>
7e3b43481SJianfeng Tan #include <fcntl.h>
8e3b43481SJianfeng Tan #include <unistd.h>
906856cabSMaxime Coquelin #include <errno.h>
10*72b452c5SDmitry Kozlyuk #include <stdlib.h>
11e3b43481SJianfeng Tan
12e3b43481SJianfeng Tan #include <rte_memory.h>
13e3b43481SJianfeng Tan
14e3b43481SJianfeng Tan #include "vhost.h"
15e3b43481SJianfeng Tan #include "virtio_user_dev.h"
16e3b43481SJianfeng Tan #include "vhost_kernel_tap.h"
17e3b43481SJianfeng Tan
1886388a3aSMaxime Coquelin struct vhost_kernel_data {
1986388a3aSMaxime Coquelin int *vhostfds;
2086388a3aSMaxime Coquelin int *tapfds;
2186388a3aSMaxime Coquelin };
2286388a3aSMaxime Coquelin
23e3b43481SJianfeng Tan struct vhost_memory_kernel {
24e3b43481SJianfeng Tan uint32_t nregions;
25e3b43481SJianfeng Tan uint32_t padding;
26013b4c52SBruce Richardson struct vhost_memory_region regions[];
27e3b43481SJianfeng Tan };
28e3b43481SJianfeng Tan
29e3b43481SJianfeng Tan /* vhost kernel ioctls */
30e3b43481SJianfeng Tan #define VHOST_VIRTIO 0xAF
31e3b43481SJianfeng Tan #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
32e3b43481SJianfeng Tan #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
33e3b43481SJianfeng Tan #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
34e3b43481SJianfeng Tan #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
35e3b43481SJianfeng Tan #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel)
36e3b43481SJianfeng Tan #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
37e3b43481SJianfeng Tan #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
38e3b43481SJianfeng Tan #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
39e3b43481SJianfeng Tan #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
40e3b43481SJianfeng Tan #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
41e3b43481SJianfeng Tan #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
42e3b43481SJianfeng Tan #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
43e3b43481SJianfeng Tan #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
44e3b43481SJianfeng Tan #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
45e3b43481SJianfeng Tan #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
46e3b43481SJianfeng Tan
47cc0151b3SMaxime Coquelin /* with below features, vhost kernel does not need to do the checksum and TSO,
48cc0151b3SMaxime Coquelin * these info will be passed to virtio_user through virtio net header.
49cc0151b3SMaxime Coquelin */
50cc0151b3SMaxime Coquelin #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \
51cc0151b3SMaxime Coquelin ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
52cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
53cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
54cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
55cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_GUEST_UFO))
56cc0151b3SMaxime Coquelin
57cc0151b3SMaxime Coquelin /* with below features, when flows from virtio_user to vhost kernel
58cc0151b3SMaxime Coquelin * (1) if flows goes up through the kernel networking stack, it does not need
59cc0151b3SMaxime Coquelin * to verify checksum, which can save CPU cycles;
60cc0151b3SMaxime Coquelin * (2) if flows goes through a Linux bridge and outside from an interface
61cc0151b3SMaxime Coquelin * (kernel driver), checksum and TSO will be done by GSO in kernel or even
62cc0151b3SMaxime Coquelin * offloaded into real physical device.
63cc0151b3SMaxime Coquelin */
64cc0151b3SMaxime Coquelin #define VHOST_KERNEL_HOST_OFFLOADS_MASK \
65cc0151b3SMaxime Coquelin ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \
66cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
67cc0151b3SMaxime Coquelin (1ULL << VIRTIO_NET_F_CSUM))
68cc0151b3SMaxime Coquelin
69e3b43481SJianfeng Tan static uint64_t max_regions = 64;
70e3b43481SJianfeng Tan
71e3b43481SJianfeng Tan static void
get_vhost_kernel_max_regions(void)72e3b43481SJianfeng Tan get_vhost_kernel_max_regions(void)
73e3b43481SJianfeng Tan {
74e3b43481SJianfeng Tan int fd;
75e3b43481SJianfeng Tan char buf[20] = {'\0'};
76e3b43481SJianfeng Tan
77e3b43481SJianfeng Tan fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY);
78e3b43481SJianfeng Tan if (fd < 0)
79e3b43481SJianfeng Tan return;
80e3b43481SJianfeng Tan
81e3b43481SJianfeng Tan if (read(fd, buf, sizeof(buf) - 1) > 0)
82e3b43481SJianfeng Tan max_regions = strtoull(buf, NULL, 10);
83e3b43481SJianfeng Tan
84e3b43481SJianfeng Tan close(fd);
85e3b43481SJianfeng Tan }
86e3b43481SJianfeng Tan
8706856cabSMaxime Coquelin static int
vhost_kernel_ioctl(int fd,uint64_t request,void * arg)8806856cabSMaxime Coquelin vhost_kernel_ioctl(int fd, uint64_t request, void *arg)
8906856cabSMaxime Coquelin {
9006856cabSMaxime Coquelin int ret;
9106856cabSMaxime Coquelin
9206856cabSMaxime Coquelin ret = ioctl(fd, request, arg);
9306856cabSMaxime Coquelin if (ret) {
9406856cabSMaxime Coquelin PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)",
9506856cabSMaxime Coquelin request, strerror(errno));
9606856cabSMaxime Coquelin return -1;
9706856cabSMaxime Coquelin }
9806856cabSMaxime Coquelin
9906856cabSMaxime Coquelin return 0;
10006856cabSMaxime Coquelin }
10106856cabSMaxime Coquelin
10206856cabSMaxime Coquelin static int
vhost_kernel_set_owner(struct virtio_user_dev * dev)10306856cabSMaxime Coquelin vhost_kernel_set_owner(struct virtio_user_dev *dev)
10406856cabSMaxime Coquelin {
1053c503b24SThierry Herbelot int ret;
1063c503b24SThierry Herbelot uint32_t i;
10786388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
10886388a3aSMaxime Coquelin
1093c503b24SThierry Herbelot for (i = 0; i < dev->max_queue_pairs; ++i) {
1103c503b24SThierry Herbelot if (data->vhostfds[i] < 0)
1113c503b24SThierry Herbelot continue;
1123c503b24SThierry Herbelot
1133c503b24SThierry Herbelot ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_OWNER, NULL);
1143c503b24SThierry Herbelot if (ret < 0)
1153c503b24SThierry Herbelot return ret;
1163c503b24SThierry Herbelot }
1173c503b24SThierry Herbelot
1183c503b24SThierry Herbelot return 0;
11906856cabSMaxime Coquelin }
12006856cabSMaxime Coquelin
121cc0151b3SMaxime Coquelin static int
vhost_kernel_get_features(struct virtio_user_dev * dev,uint64_t * features)122cc0151b3SMaxime Coquelin vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
123cc0151b3SMaxime Coquelin {
12486388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
125b72099beSDavid Marchand unsigned int tap_flags;
126b72099beSDavid Marchand int ret;
127cc0151b3SMaxime Coquelin
12886388a3aSMaxime Coquelin ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features);
129cc0151b3SMaxime Coquelin if (ret < 0) {
130cc0151b3SMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to get features");
131cc0151b3SMaxime Coquelin return -1;
132cc0151b3SMaxime Coquelin }
133cc0151b3SMaxime Coquelin
134b72099beSDavid Marchand ret = tap_get_flags(data->tapfds[0], &tap_flags);
135cc0151b3SMaxime Coquelin if (ret < 0) {
136cc0151b3SMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to get TAP features");
137cc0151b3SMaxime Coquelin return -1;
138cc0151b3SMaxime Coquelin }
139cc0151b3SMaxime Coquelin
140cc0151b3SMaxime Coquelin /* with tap as the backend, all these features are supported
141cc0151b3SMaxime Coquelin * but not claimed by vhost-net, so we add them back when
142cc0151b3SMaxime Coquelin * reporting to upper layer.
143cc0151b3SMaxime Coquelin */
144b72099beSDavid Marchand if (tap_flags & IFF_VNET_HDR) {
145cc0151b3SMaxime Coquelin *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
146cc0151b3SMaxime Coquelin *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
147cc0151b3SMaxime Coquelin }
148cc0151b3SMaxime Coquelin
149cc0151b3SMaxime Coquelin /* vhost_kernel will not declare this feature, but it does
150cc0151b3SMaxime Coquelin * support multi-queue.
151cc0151b3SMaxime Coquelin */
152b72099beSDavid Marchand if (tap_flags & IFF_MULTI_QUEUE)
153cc0151b3SMaxime Coquelin *features |= (1ull << VIRTIO_NET_F_MQ);
154cc0151b3SMaxime Coquelin
155cc0151b3SMaxime Coquelin return 0;
156cc0151b3SMaxime Coquelin }
157cc0151b3SMaxime Coquelin
158cc0151b3SMaxime Coquelin static int
vhost_kernel_set_features(struct virtio_user_dev * dev,uint64_t features)159cc0151b3SMaxime Coquelin vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features)
160cc0151b3SMaxime Coquelin {
16186388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
162cf412ff7SThierry Herbelot uint32_t i;
163cf412ff7SThierry Herbelot int ret;
16486388a3aSMaxime Coquelin
165cc0151b3SMaxime Coquelin /* We don't need memory protection here */
166cc0151b3SMaxime Coquelin features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
167cc0151b3SMaxime Coquelin /* VHOST kernel does not know about below flags */
168cc0151b3SMaxime Coquelin features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
169cc0151b3SMaxime Coquelin features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
170cc0151b3SMaxime Coquelin features &= ~(1ULL << VIRTIO_NET_F_MQ);
171cc0151b3SMaxime Coquelin
172cf412ff7SThierry Herbelot for (i = 0; i < dev->max_queue_pairs; ++i) {
173cf412ff7SThierry Herbelot if (data->vhostfds[i] < 0)
174cf412ff7SThierry Herbelot continue;
175cf412ff7SThierry Herbelot
176cf412ff7SThierry Herbelot ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_FEATURES, &features);
177cf412ff7SThierry Herbelot if (ret < 0)
178cf412ff7SThierry Herbelot return ret;
179cf412ff7SThierry Herbelot }
180cf412ff7SThierry Herbelot
181cf412ff7SThierry Herbelot return 0;
182cc0151b3SMaxime Coquelin }
183cc0151b3SMaxime Coquelin
184746c346dSAnatoly Burakov static int
add_memseg_list(const struct rte_memseg_list * msl,void * arg)18575728654STiwei Bie add_memseg_list(const struct rte_memseg_list *msl, void *arg)
186746c346dSAnatoly Burakov {
18775728654STiwei Bie struct vhost_memory_kernel *vm = arg;
188746c346dSAnatoly Burakov struct vhost_memory_region *mr;
189746c346dSAnatoly Burakov void *start_addr;
19075728654STiwei Bie uint64_t len;
191746c346dSAnatoly Burakov
1925282bb1cSAnatoly Burakov if (msl->external)
1935282bb1cSAnatoly Burakov return 0;
1945282bb1cSAnatoly Burakov
19575728654STiwei Bie if (vm->nregions >= max_regions)
196746c346dSAnatoly Burakov return -1;
197746c346dSAnatoly Burakov
19875728654STiwei Bie start_addr = msl->base_va;
19975728654STiwei Bie len = msl->page_sz * msl->memseg_arr.len;
20075728654STiwei Bie
20175728654STiwei Bie mr = &vm->regions[vm->nregions++];
202746c346dSAnatoly Burakov
203746c346dSAnatoly Burakov mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr;
204746c346dSAnatoly Burakov mr->userspace_addr = (uint64_t)(uintptr_t)start_addr;
205746c346dSAnatoly Burakov mr->memory_size = len;
20675728654STiwei Bie mr->mmap_offset = 0; /* flags_padding */
20775728654STiwei Bie
20875728654STiwei Bie PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64,
20975728654STiwei Bie vm->nregions - 1, start_addr, len);
210746c346dSAnatoly Burakov
211746c346dSAnatoly Burakov return 0;
212746c346dSAnatoly Burakov }
213746c346dSAnatoly Burakov
21475728654STiwei Bie /* By default, vhost kernel module allows 64 regions, but DPDK may
21575728654STiwei Bie * have much more memory regions. Below function will treat each
21675728654STiwei Bie * contiguous memory space reserved by DPDK as one region.
217e3b43481SJianfeng Tan */
218539d910cSMaxime Coquelin static int
vhost_kernel_set_memory_table(struct virtio_user_dev * dev)219539d910cSMaxime Coquelin vhost_kernel_set_memory_table(struct virtio_user_dev *dev)
220e3b43481SJianfeng Tan {
221b01e1e9aSThierry Herbelot uint32_t i;
22286388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
223e3b43481SJianfeng Tan struct vhost_memory_kernel *vm;
224539d910cSMaxime Coquelin int ret;
225e3b43481SJianfeng Tan
226e3b43481SJianfeng Tan vm = malloc(sizeof(struct vhost_memory_kernel) +
227e3b43481SJianfeng Tan max_regions *
228e3b43481SJianfeng Tan sizeof(struct vhost_memory_region));
2291e9057a9SJianfeng Tan if (!vm)
230539d910cSMaxime Coquelin goto err;
231e3b43481SJianfeng Tan
23275728654STiwei Bie vm->nregions = 0;
23375728654STiwei Bie vm->padding = 0;
234e3b43481SJianfeng Tan
2357ff26957STiwei Bie /*
2367ff26957STiwei Bie * The memory lock has already been taken by memory subsystem
2377ff26957STiwei Bie * or virtio_user_start_device().
2387ff26957STiwei Bie */
239539d910cSMaxime Coquelin ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm);
240539d910cSMaxime Coquelin if (ret < 0)
241539d910cSMaxime Coquelin goto err_free;
242539d910cSMaxime Coquelin
243b01e1e9aSThierry Herbelot for (i = 0; i < dev->max_queue_pairs; ++i) {
244b01e1e9aSThierry Herbelot if (data->vhostfds[i] < 0)
245b01e1e9aSThierry Herbelot continue;
246b01e1e9aSThierry Herbelot
247b01e1e9aSThierry Herbelot ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_MEM_TABLE, vm);
248539d910cSMaxime Coquelin if (ret < 0)
249539d910cSMaxime Coquelin goto err_free;
250b01e1e9aSThierry Herbelot }
251539d910cSMaxime Coquelin
252e3b43481SJianfeng Tan free(vm);
253539d910cSMaxime Coquelin
254539d910cSMaxime Coquelin return 0;
255539d910cSMaxime Coquelin err_free:
256539d910cSMaxime Coquelin free(vm);
257539d910cSMaxime Coquelin err:
258539d910cSMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to set memory table");
259539d910cSMaxime Coquelin return -1;
260e3b43481SJianfeng Tan }
261e3b43481SJianfeng Tan
262ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring(struct virtio_user_dev * dev,uint64_t req,struct vhost_vring_state * state)263ab9098d2SMaxime Coquelin vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state)
264ab9098d2SMaxime Coquelin {
265ab9098d2SMaxime Coquelin int ret, fd;
266ab9098d2SMaxime Coquelin unsigned int index = state->index;
26786388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
268ab9098d2SMaxime Coquelin
269ab9098d2SMaxime Coquelin /* Convert from queue index to queue-pair & offset */
27086388a3aSMaxime Coquelin fd = data->vhostfds[state->index / 2];
271ab9098d2SMaxime Coquelin state->index %= 2;
272ab9098d2SMaxime Coquelin
273ab9098d2SMaxime Coquelin ret = vhost_kernel_ioctl(fd, req, state);
274ab9098d2SMaxime Coquelin if (ret < 0) {
275ab9098d2SMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req);
276ab9098d2SMaxime Coquelin return -1;
277ab9098d2SMaxime Coquelin }
278ab9098d2SMaxime Coquelin
279ab9098d2SMaxime Coquelin /* restore index back to queue index */
280ab9098d2SMaxime Coquelin state->index = index;
281ab9098d2SMaxime Coquelin
282ab9098d2SMaxime Coquelin return 0;
283ab9098d2SMaxime Coquelin }
284ab9098d2SMaxime Coquelin
285ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring_num(struct virtio_user_dev * dev,struct vhost_vring_state * state)286ab9098d2SMaxime Coquelin vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
287ab9098d2SMaxime Coquelin {
288ab9098d2SMaxime Coquelin return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state);
289ab9098d2SMaxime Coquelin }
290ab9098d2SMaxime Coquelin
291ab9098d2SMaxime Coquelin static int
vhost_kernel_set_vring_base(struct virtio_user_dev * dev,struct vhost_vring_state * state)292ab9098d2SMaxime Coquelin vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
293ab9098d2SMaxime Coquelin {
294ab9098d2SMaxime Coquelin return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state);
295ab9098d2SMaxime Coquelin }
296ab9098d2SMaxime Coquelin
297ab9098d2SMaxime Coquelin static int
vhost_kernel_get_vring_base(struct virtio_user_dev * dev,struct vhost_vring_state * state)298ab9098d2SMaxime Coquelin vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
299ab9098d2SMaxime Coquelin {
300ab9098d2SMaxime Coquelin return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state);
301ab9098d2SMaxime Coquelin }
302ab9098d2SMaxime Coquelin
303ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_file(struct virtio_user_dev * dev,uint64_t req,struct vhost_vring_file * file)304ce399c36SMaxime Coquelin vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req,
305ce399c36SMaxime Coquelin struct vhost_vring_file *file)
306ce399c36SMaxime Coquelin {
307ce399c36SMaxime Coquelin int ret, fd;
308ce399c36SMaxime Coquelin unsigned int index = file->index;
30986388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
310ce399c36SMaxime Coquelin
311ce399c36SMaxime Coquelin /* Convert from queue index to queue-pair & offset */
31286388a3aSMaxime Coquelin fd = data->vhostfds[file->index / 2];
313ce399c36SMaxime Coquelin file->index %= 2;
314ce399c36SMaxime Coquelin
315ce399c36SMaxime Coquelin ret = vhost_kernel_ioctl(fd, req, file);
316ce399c36SMaxime Coquelin if (ret < 0) {
317ce399c36SMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req);
318ce399c36SMaxime Coquelin return -1;
319ce399c36SMaxime Coquelin }
320ce399c36SMaxime Coquelin
321ce399c36SMaxime Coquelin /* restore index back to queue index */
322ce399c36SMaxime Coquelin file->index = index;
323ce399c36SMaxime Coquelin
324ce399c36SMaxime Coquelin return 0;
325ce399c36SMaxime Coquelin }
326ce399c36SMaxime Coquelin
327ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_kick(struct virtio_user_dev * dev,struct vhost_vring_file * file)328ce399c36SMaxime Coquelin vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
329ce399c36SMaxime Coquelin {
330ce399c36SMaxime Coquelin return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file);
331ce399c36SMaxime Coquelin }
332ce399c36SMaxime Coquelin
333ce399c36SMaxime Coquelin static int
vhost_kernel_set_vring_call(struct virtio_user_dev * dev,struct vhost_vring_file * file)334ce399c36SMaxime Coquelin vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
335ce399c36SMaxime Coquelin {
336ce399c36SMaxime Coquelin return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file);
337ce399c36SMaxime Coquelin }
338ce399c36SMaxime Coquelin
339dc65db73SMaxime Coquelin static int
vhost_kernel_set_vring_addr(struct virtio_user_dev * dev,struct vhost_vring_addr * addr)340dc65db73SMaxime Coquelin vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
341dc65db73SMaxime Coquelin {
342dc65db73SMaxime Coquelin int ret, fd;
343dc65db73SMaxime Coquelin unsigned int index = addr->index;
34486388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
345dc65db73SMaxime Coquelin
346dc65db73SMaxime Coquelin /* Convert from queue index to queue-pair & offset */
34786388a3aSMaxime Coquelin fd = data->vhostfds[addr->index / 2];
348dc65db73SMaxime Coquelin addr->index %= 2;
349dc65db73SMaxime Coquelin
350dc65db73SMaxime Coquelin ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr);
351dc65db73SMaxime Coquelin if (ret < 0) {
352dc65db73SMaxime Coquelin PMD_DRV_LOG(ERR, "Failed to set vring address");
353dc65db73SMaxime Coquelin return -1;
354dc65db73SMaxime Coquelin }
355dc65db73SMaxime Coquelin
356dc65db73SMaxime Coquelin /* restore index back to queue index */
357dc65db73SMaxime Coquelin addr->index = index;
358dc65db73SMaxime Coquelin
359dc65db73SMaxime Coquelin return 0;
360dc65db73SMaxime Coquelin }
361dc65db73SMaxime Coquelin
3628723c894SMaxime Coquelin static int
vhost_kernel_get_status(struct virtio_user_dev * dev __rte_unused,uint8_t * status __rte_unused)3638723c894SMaxime Coquelin vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused)
3648723c894SMaxime Coquelin {
3658723c894SMaxime Coquelin return -ENOTSUP;
3668723c894SMaxime Coquelin }
3678723c894SMaxime Coquelin
3688723c894SMaxime Coquelin static int
vhost_kernel_set_status(struct virtio_user_dev * dev __rte_unused,uint8_t status __rte_unused)3698723c894SMaxime Coquelin vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused)
3708723c894SMaxime Coquelin {
3718723c894SMaxime Coquelin return -ENOTSUP;
3728723c894SMaxime Coquelin }
3738723c894SMaxime Coquelin
374e3b43481SJianfeng Tan /**
375e3b43481SJianfeng Tan * Set up environment to talk with a vhost kernel backend.
376e3b43481SJianfeng Tan *
377e3b43481SJianfeng Tan * @return
378e3b43481SJianfeng Tan * - (-1) if fail to set up;
379e3b43481SJianfeng Tan * - (>=0) if successful.
380e3b43481SJianfeng Tan */
381e3b43481SJianfeng Tan static int
vhost_kernel_setup(struct virtio_user_dev * dev)382e3b43481SJianfeng Tan vhost_kernel_setup(struct virtio_user_dev *dev)
383e3b43481SJianfeng Tan {
38486388a3aSMaxime Coquelin struct vhost_kernel_data *data;
385b72099beSDavid Marchand unsigned int tap_features;
386b72099beSDavid Marchand unsigned int tap_flags;
387666ff776SHarold Huang unsigned int r_flags;
388b72099beSDavid Marchand const char *ifname;
389b72099beSDavid Marchand uint32_t q, i;
390b72099beSDavid Marchand int vhostfd;
391b72099beSDavid Marchand
392b72099beSDavid Marchand if (tap_support_features(&tap_features) < 0)
393b72099beSDavid Marchand return -1;
394b72099beSDavid Marchand
395b72099beSDavid Marchand if ((tap_features & IFF_VNET_HDR) == 0) {
396b72099beSDavid Marchand PMD_INIT_LOG(ERR, "TAP does not support IFF_VNET_HDR");
397b72099beSDavid Marchand return -1;
398b72099beSDavid Marchand }
399666ff776SHarold Huang r_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
400666ff776SHarold Huang
401666ff776SHarold Huang if (tap_features & IFF_NAPI)
402666ff776SHarold Huang r_flags |= IFF_NAPI;
40386388a3aSMaxime Coquelin
40486388a3aSMaxime Coquelin data = malloc(sizeof(*data));
40586388a3aSMaxime Coquelin if (!data) {
40686388a3aSMaxime Coquelin PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path);
40786388a3aSMaxime Coquelin return -1;
40886388a3aSMaxime Coquelin }
40986388a3aSMaxime Coquelin
41086388a3aSMaxime Coquelin data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
41186388a3aSMaxime Coquelin if (!data->vhostfds) {
41286388a3aSMaxime Coquelin PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path);
41386388a3aSMaxime Coquelin goto err_data;
41486388a3aSMaxime Coquelin }
41586388a3aSMaxime Coquelin data->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
41686388a3aSMaxime Coquelin if (!data->tapfds) {
41786388a3aSMaxime Coquelin PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path);
41886388a3aSMaxime Coquelin goto err_vhostfds;
41986388a3aSMaxime Coquelin }
42086388a3aSMaxime Coquelin
42186388a3aSMaxime Coquelin for (q = 0; q < dev->max_queue_pairs; ++q) {
42286388a3aSMaxime Coquelin data->vhostfds[q] = -1;
42386388a3aSMaxime Coquelin data->tapfds[q] = -1;
42486388a3aSMaxime Coquelin }
425e3b43481SJianfeng Tan
426e3b43481SJianfeng Tan get_vhost_kernel_max_regions();
427e3b43481SJianfeng Tan
428e3b43481SJianfeng Tan for (i = 0; i < dev->max_queue_pairs; ++i) {
429e3b43481SJianfeng Tan vhostfd = open(dev->path, O_RDWR);
430e3b43481SJianfeng Tan if (vhostfd < 0) {
43186388a3aSMaxime Coquelin PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno));
43286388a3aSMaxime Coquelin goto err_tapfds;
43386388a3aSMaxime Coquelin }
43486388a3aSMaxime Coquelin data->vhostfds[i] = vhostfd;
43586388a3aSMaxime Coquelin }
43686388a3aSMaxime Coquelin
437b72099beSDavid Marchand ifname = dev->ifname != NULL ? dev->ifname : "tap%d";
438666ff776SHarold Huang data->tapfds[0] = tap_open(ifname, r_flags, (tap_features & IFF_MULTI_QUEUE) != 0);
439b72099beSDavid Marchand if (data->tapfds[0] < 0)
440b72099beSDavid Marchand goto err_tapfds;
441b72099beSDavid Marchand if (dev->ifname == NULL && tap_get_name(data->tapfds[0], &dev->ifname) < 0) {
442b72099beSDavid Marchand PMD_DRV_LOG(ERR, "fail to get tap name (%d)", data->tapfds[0]);
443b72099beSDavid Marchand goto err_tapfds;
444b72099beSDavid Marchand }
445b72099beSDavid Marchand if (tap_get_flags(data->tapfds[0], &tap_flags) < 0) {
446b72099beSDavid Marchand PMD_DRV_LOG(ERR, "fail to get tap flags for tap %s", dev->ifname);
447b72099beSDavid Marchand goto err_tapfds;
448b72099beSDavid Marchand }
449b72099beSDavid Marchand if ((tap_flags & IFF_MULTI_QUEUE) == 0 && dev->max_queue_pairs > 1) {
450b72099beSDavid Marchand PMD_DRV_LOG(ERR, "tap %s does not support multi queue", dev->ifname);
451b72099beSDavid Marchand goto err_tapfds;
452b72099beSDavid Marchand }
453b72099beSDavid Marchand
454b72099beSDavid Marchand for (i = 1; i < dev->max_queue_pairs; i++) {
455666ff776SHarold Huang data->tapfds[i] = tap_open(dev->ifname, r_flags, true);
456b72099beSDavid Marchand if (data->tapfds[i] < 0)
457b72099beSDavid Marchand goto err_tapfds;
458b72099beSDavid Marchand }
459b72099beSDavid Marchand
46086388a3aSMaxime Coquelin dev->backend_data = data;
46186388a3aSMaxime Coquelin
46286388a3aSMaxime Coquelin return 0;
46386388a3aSMaxime Coquelin
46486388a3aSMaxime Coquelin err_tapfds:
465b72099beSDavid Marchand for (i = 0; i < dev->max_queue_pairs; i++) {
46686388a3aSMaxime Coquelin if (data->vhostfds[i] >= 0)
46786388a3aSMaxime Coquelin close(data->vhostfds[i]);
468b72099beSDavid Marchand if (data->tapfds[i] >= 0)
469b72099beSDavid Marchand close(data->tapfds[i]);
470b72099beSDavid Marchand }
47186388a3aSMaxime Coquelin
47286388a3aSMaxime Coquelin free(data->tapfds);
47386388a3aSMaxime Coquelin err_vhostfds:
47486388a3aSMaxime Coquelin free(data->vhostfds);
47586388a3aSMaxime Coquelin err_data:
47686388a3aSMaxime Coquelin free(data);
47786388a3aSMaxime Coquelin
478e3b43481SJianfeng Tan return -1;
479e3b43481SJianfeng Tan }
480e3b43481SJianfeng Tan
481e3b43481SJianfeng Tan static int
vhost_kernel_destroy(struct virtio_user_dev * dev)48286388a3aSMaxime Coquelin vhost_kernel_destroy(struct virtio_user_dev *dev)
483748e5ea5SMaxime Coquelin {
48486388a3aSMaxime Coquelin struct vhost_kernel_data *data = dev->backend_data;
48586388a3aSMaxime Coquelin uint32_t i;
48686388a3aSMaxime Coquelin
48786388a3aSMaxime Coquelin if (!data)
48886388a3aSMaxime Coquelin return 0;
48986388a3aSMaxime Coquelin
49086388a3aSMaxime Coquelin for (i = 0; i < dev->max_queue_pairs; ++i) {
49186388a3aSMaxime Coquelin if (data->vhostfds[i] >= 0)
49286388a3aSMaxime Coquelin close(data->vhostfds[i]);
49386388a3aSMaxime Coquelin if (data->tapfds[i] >= 0)
49486388a3aSMaxime Coquelin close(data->tapfds[i]);
49586388a3aSMaxime Coquelin }
49686388a3aSMaxime Coquelin
49786388a3aSMaxime Coquelin free(data->vhostfds);
49886388a3aSMaxime Coquelin free(data->tapfds);
49986388a3aSMaxime Coquelin free(data);
50086388a3aSMaxime Coquelin dev->backend_data = NULL;
50186388a3aSMaxime Coquelin
502748e5ea5SMaxime Coquelin return 0;
503748e5ea5SMaxime Coquelin }
504748e5ea5SMaxime Coquelin
505748e5ea5SMaxime Coquelin static int
vhost_kernel_set_backend(int vhostfd,int tapfd)506e3b43481SJianfeng Tan vhost_kernel_set_backend(int vhostfd, int tapfd)
507e3b43481SJianfeng Tan {
508e3b43481SJianfeng Tan struct vhost_vring_file f;
509e3b43481SJianfeng Tan
510e3b43481SJianfeng Tan f.fd = tapfd;
511e3b43481SJianfeng Tan f.index = 0;
512e3b43481SJianfeng Tan if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
513e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
514e3b43481SJianfeng Tan strerror(errno));
515e3b43481SJianfeng Tan return -1;
516e3b43481SJianfeng Tan }
517e3b43481SJianfeng Tan
518e3b43481SJianfeng Tan f.index = 1;
519e3b43481SJianfeng Tan if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
520e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
521e3b43481SJianfeng Tan strerror(errno));
522e3b43481SJianfeng Tan return -1;
523e3b43481SJianfeng Tan }
524e3b43481SJianfeng Tan
525e3b43481SJianfeng Tan return 0;
526e3b43481SJianfeng Tan }
527e3b43481SJianfeng Tan
528e3b43481SJianfeng Tan static int
vhost_kernel_enable_queue_pair(struct virtio_user_dev * dev,uint16_t pair_idx,int enable)529e3b43481SJianfeng Tan vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
530e3b43481SJianfeng Tan uint16_t pair_idx,
531e3b43481SJianfeng Tan int enable)
532e3b43481SJianfeng Tan {
533b72099beSDavid Marchand struct vhost_kernel_data *data = dev->backend_data;
534e3b43481SJianfeng Tan int hdr_size;
535e3b43481SJianfeng Tan int vhostfd;
536e3b43481SJianfeng Tan int tapfd;
537e3b43481SJianfeng Tan
53847ac9661STiwei Bie if (dev->qp_enabled[pair_idx] == enable)
539e3b43481SJianfeng Tan return 0;
54047ac9661STiwei Bie
541b72099beSDavid Marchand vhostfd = data->vhostfds[pair_idx];
54286388a3aSMaxime Coquelin tapfd = data->tapfds[pair_idx];
543b72099beSDavid Marchand
544b72099beSDavid Marchand if (!enable) {
54547ac9661STiwei Bie if (vhost_kernel_set_backend(vhostfd, -1) < 0) {
54647ac9661STiwei Bie PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
54747ac9661STiwei Bie return -1;
54847ac9661STiwei Bie }
54947ac9661STiwei Bie dev->qp_enabled[pair_idx] = false;
55047ac9661STiwei Bie return 0;
55147ac9661STiwei Bie }
55247ac9661STiwei Bie
553e3b43481SJianfeng Tan if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
554e3b43481SJianfeng Tan (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
555e3b43481SJianfeng Tan hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
556e3b43481SJianfeng Tan else
557e3b43481SJianfeng Tan hdr_size = sizeof(struct virtio_net_hdr);
558e3b43481SJianfeng Tan
559b72099beSDavid Marchand /* Set mac on tap only once when starting */
560b72099beSDavid Marchand if (!dev->started && pair_idx == 0 &&
561b72099beSDavid Marchand tap_set_mac(data->tapfds[pair_idx], dev->mac_addr) < 0)
562b72099beSDavid Marchand return -1;
563b72099beSDavid Marchand
564b72099beSDavid Marchand if (vhost_kernel_tap_setup(tapfd, hdr_size, dev->features) < 0) {
565b72099beSDavid Marchand PMD_DRV_LOG(ERR, "fail to setup tap for vhost kernel");
566e3b43481SJianfeng Tan return -1;
567e3b43481SJianfeng Tan }
568e3b43481SJianfeng Tan
569e3b43481SJianfeng Tan if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
570e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
571e3b43481SJianfeng Tan return -1;
572e3b43481SJianfeng Tan }
573e3b43481SJianfeng Tan
57447ac9661STiwei Bie dev->qp_enabled[pair_idx] = true;
575e3b43481SJianfeng Tan return 0;
576e3b43481SJianfeng Tan }
577e3b43481SJianfeng Tan
5785b75b63cSMaxime Coquelin static int
vhost_kernel_get_backend_features(uint64_t * features)5795b75b63cSMaxime Coquelin vhost_kernel_get_backend_features(uint64_t *features)
5805b75b63cSMaxime Coquelin {
5815b75b63cSMaxime Coquelin *features = 0;
5825b75b63cSMaxime Coquelin
5835b75b63cSMaxime Coquelin return 0;
5845b75b63cSMaxime Coquelin }
5855b75b63cSMaxime Coquelin
58694973531SMaxime Coquelin static int
vhost_kernel_update_link_state(struct virtio_user_dev * dev __rte_unused)58794973531SMaxime Coquelin vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused)
58894973531SMaxime Coquelin {
58994973531SMaxime Coquelin /* Nothing to update (Maybe get TAP interface link state?) */
59094973531SMaxime Coquelin return 0;
59194973531SMaxime Coquelin }
59294973531SMaxime Coquelin
59394973531SMaxime Coquelin static int
vhost_kernel_get_intr_fd(struct virtio_user_dev * dev __rte_unused)59494973531SMaxime Coquelin vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused)
59594973531SMaxime Coquelin {
59694973531SMaxime Coquelin /* No link state interrupt with Vhost-kernel */
59794973531SMaxime Coquelin return -1;
59894973531SMaxime Coquelin }
59994973531SMaxime Coquelin
600520dd992SFerruh Yigit struct virtio_user_backend_ops virtio_ops_kernel = {
601e3b43481SJianfeng Tan .setup = vhost_kernel_setup,
602748e5ea5SMaxime Coquelin .destroy = vhost_kernel_destroy,
6035b75b63cSMaxime Coquelin .get_backend_features = vhost_kernel_get_backend_features,
60406856cabSMaxime Coquelin .set_owner = vhost_kernel_set_owner,
605cc0151b3SMaxime Coquelin .get_features = vhost_kernel_get_features,
606cc0151b3SMaxime Coquelin .set_features = vhost_kernel_set_features,
607539d910cSMaxime Coquelin .set_memory_table = vhost_kernel_set_memory_table,
608ab9098d2SMaxime Coquelin .set_vring_num = vhost_kernel_set_vring_num,
609ab9098d2SMaxime Coquelin .set_vring_base = vhost_kernel_set_vring_base,
610ab9098d2SMaxime Coquelin .get_vring_base = vhost_kernel_get_vring_base,
611ce399c36SMaxime Coquelin .set_vring_call = vhost_kernel_set_vring_call,
612ce399c36SMaxime Coquelin .set_vring_kick = vhost_kernel_set_vring_kick,
613dc65db73SMaxime Coquelin .set_vring_addr = vhost_kernel_set_vring_addr,
6148723c894SMaxime Coquelin .get_status = vhost_kernel_get_status,
6158723c894SMaxime Coquelin .set_status = vhost_kernel_set_status,
61694973531SMaxime Coquelin .enable_qp = vhost_kernel_enable_queue_pair,
61794973531SMaxime Coquelin .update_link_state = vhost_kernel_update_link_state,
61894973531SMaxime Coquelin .get_intr_fd = vhost_kernel_get_intr_fd,
619e3b43481SJianfeng Tan };
620