1e3b43481SJianfeng Tan /*- 2e3b43481SJianfeng Tan * BSD LICENSE 3e3b43481SJianfeng Tan * 4e3b43481SJianfeng Tan * Copyright(c) 2016 Intel Corporation. All rights reserved. 5e3b43481SJianfeng Tan * All rights reserved. 6e3b43481SJianfeng Tan * 7e3b43481SJianfeng Tan * Redistribution and use in source and binary forms, with or without 8e3b43481SJianfeng Tan * modification, are permitted provided that the following conditions 9e3b43481SJianfeng Tan * are met: 10e3b43481SJianfeng Tan * 11e3b43481SJianfeng Tan * * Redistributions of source code must retain the above copyright 12e3b43481SJianfeng Tan * notice, this list of conditions and the following disclaimer. 13e3b43481SJianfeng Tan * * Redistributions in binary form must reproduce the above copyright 14e3b43481SJianfeng Tan * notice, this list of conditions and the following disclaimer in 15e3b43481SJianfeng Tan * the documentation and/or other materials provided with the 16e3b43481SJianfeng Tan * distribution. 17e3b43481SJianfeng Tan * * Neither the name of Intel Corporation nor the names of its 18e3b43481SJianfeng Tan * contributors may be used to endorse or promote products derived 19e3b43481SJianfeng Tan * from this software without specific prior written permission. 20e3b43481SJianfeng Tan * 21e3b43481SJianfeng Tan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22e3b43481SJianfeng Tan * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23e3b43481SJianfeng Tan * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24e3b43481SJianfeng Tan * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25e3b43481SJianfeng Tan * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26e3b43481SJianfeng Tan * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27e3b43481SJianfeng Tan * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28e3b43481SJianfeng Tan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29e3b43481SJianfeng Tan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30e3b43481SJianfeng Tan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31e3b43481SJianfeng Tan * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32e3b43481SJianfeng Tan */ 33e3b43481SJianfeng Tan 34e3b43481SJianfeng Tan #include <sys/types.h> 35e3b43481SJianfeng Tan #include <sys/stat.h> 36e3b43481SJianfeng Tan #include <fcntl.h> 37e3b43481SJianfeng Tan #include <unistd.h> 38e3b43481SJianfeng Tan 39e3b43481SJianfeng Tan #include <rte_memory.h> 40e3b43481SJianfeng Tan #include <rte_eal_memconfig.h> 41e3b43481SJianfeng Tan 42e3b43481SJianfeng Tan #include "vhost.h" 43e3b43481SJianfeng Tan #include "virtio_user_dev.h" 44e3b43481SJianfeng Tan #include "vhost_kernel_tap.h" 45e3b43481SJianfeng Tan 46e3b43481SJianfeng Tan struct vhost_memory_kernel { 47e3b43481SJianfeng Tan uint32_t nregions; 48e3b43481SJianfeng Tan uint32_t padding; 49e3b43481SJianfeng Tan struct vhost_memory_region regions[0]; 50e3b43481SJianfeng Tan }; 51e3b43481SJianfeng Tan 52e3b43481SJianfeng Tan /* vhost kernel ioctls */ 53e3b43481SJianfeng Tan #define VHOST_VIRTIO 0xAF 54e3b43481SJianfeng Tan #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 55e3b43481SJianfeng Tan #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 56e3b43481SJianfeng Tan #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 57e3b43481SJianfeng Tan #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 58e3b43481SJianfeng Tan #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 59e3b43481SJianfeng Tan #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 60e3b43481SJianfeng Tan #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 61e3b43481SJianfeng Tan #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 62e3b43481SJianfeng Tan #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 63e3b43481SJianfeng Tan #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 64e3b43481SJianfeng Tan #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 65e3b43481SJianfeng Tan #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 66e3b43481SJianfeng Tan #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 67e3b43481SJianfeng Tan #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 68e3b43481SJianfeng Tan #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 69e3b43481SJianfeng Tan 70e3b43481SJianfeng Tan static uint64_t max_regions = 64; 71e3b43481SJianfeng Tan 72e3b43481SJianfeng Tan static void 73e3b43481SJianfeng Tan get_vhost_kernel_max_regions(void) 74e3b43481SJianfeng Tan { 75e3b43481SJianfeng Tan int fd; 76e3b43481SJianfeng Tan char buf[20] = {'\0'}; 77e3b43481SJianfeng Tan 78e3b43481SJianfeng Tan fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 79e3b43481SJianfeng Tan if (fd < 0) 80e3b43481SJianfeng Tan return; 81e3b43481SJianfeng Tan 82e3b43481SJianfeng Tan if (read(fd, buf, sizeof(buf) - 1) > 0) 83e3b43481SJianfeng Tan max_regions = strtoull(buf, NULL, 10); 84e3b43481SJianfeng Tan 85e3b43481SJianfeng Tan close(fd); 86e3b43481SJianfeng Tan } 87e3b43481SJianfeng Tan 88e3b43481SJianfeng Tan static uint64_t vhost_req_user_to_kernel[] = { 89e3b43481SJianfeng Tan [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 90e3b43481SJianfeng Tan [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 91e3b43481SJianfeng Tan [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 92e3b43481SJianfeng Tan [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 93e3b43481SJianfeng Tan [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 94e3b43481SJianfeng Tan [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 95e3b43481SJianfeng Tan [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 96e3b43481SJianfeng Tan [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 97e3b43481SJianfeng Tan [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 98e3b43481SJianfeng Tan [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 99e3b43481SJianfeng Tan [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 100e3b43481SJianfeng Tan }; 101e3b43481SJianfeng Tan 102e3b43481SJianfeng Tan /* By default, vhost kernel module allows 64 regions, but DPDK allows 103e3b43481SJianfeng Tan * 256 segments. As a relief, below function merges those virtually 104e3b43481SJianfeng Tan * adjacent memsegs into one region. 105e3b43481SJianfeng Tan */ 106e3b43481SJianfeng Tan static struct vhost_memory_kernel * 107e3b43481SJianfeng Tan prepare_vhost_memory_kernel(void) 108e3b43481SJianfeng Tan { 109e3b43481SJianfeng Tan uint32_t i, j, k = 0; 110e3b43481SJianfeng Tan struct rte_memseg *seg; 111e3b43481SJianfeng Tan struct vhost_memory_region *mr; 112e3b43481SJianfeng Tan struct vhost_memory_kernel *vm; 113e3b43481SJianfeng Tan 114e3b43481SJianfeng Tan vm = malloc(sizeof(struct vhost_memory_kernel) + 115e3b43481SJianfeng Tan max_regions * 116e3b43481SJianfeng Tan sizeof(struct vhost_memory_region)); 117*1e9057a9SJianfeng Tan if (!vm) 118*1e9057a9SJianfeng Tan return NULL; 119e3b43481SJianfeng Tan 120e3b43481SJianfeng Tan for (i = 0; i < RTE_MAX_MEMSEG; ++i) { 121e3b43481SJianfeng Tan seg = &rte_eal_get_configuration()->mem_config->memseg[i]; 122e3b43481SJianfeng Tan if (!seg->addr) 123e3b43481SJianfeng Tan break; 124e3b43481SJianfeng Tan 125e3b43481SJianfeng Tan int new_region = 1; 126e3b43481SJianfeng Tan 127e3b43481SJianfeng Tan for (j = 0; j < k; ++j) { 128e3b43481SJianfeng Tan mr = &vm->regions[j]; 129e3b43481SJianfeng Tan 130e3b43481SJianfeng Tan if (mr->userspace_addr + mr->memory_size == 131e3b43481SJianfeng Tan (uint64_t)(uintptr_t)seg->addr) { 132e3b43481SJianfeng Tan mr->memory_size += seg->len; 133e3b43481SJianfeng Tan new_region = 0; 134e3b43481SJianfeng Tan break; 135e3b43481SJianfeng Tan } 136e3b43481SJianfeng Tan 137e3b43481SJianfeng Tan if ((uint64_t)(uintptr_t)seg->addr + seg->len == 138e3b43481SJianfeng Tan mr->userspace_addr) { 139e3b43481SJianfeng Tan mr->guest_phys_addr = 140e3b43481SJianfeng Tan (uint64_t)(uintptr_t)seg->addr; 141e3b43481SJianfeng Tan mr->userspace_addr = 142e3b43481SJianfeng Tan (uint64_t)(uintptr_t)seg->addr; 143e3b43481SJianfeng Tan mr->memory_size += seg->len; 144e3b43481SJianfeng Tan new_region = 0; 145e3b43481SJianfeng Tan break; 146e3b43481SJianfeng Tan } 147e3b43481SJianfeng Tan } 148e3b43481SJianfeng Tan 149e3b43481SJianfeng Tan if (new_region == 0) 150e3b43481SJianfeng Tan continue; 151e3b43481SJianfeng Tan 152e3b43481SJianfeng Tan mr = &vm->regions[k++]; 153e3b43481SJianfeng Tan /* use vaddr here! */ 154e3b43481SJianfeng Tan mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; 155e3b43481SJianfeng Tan mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; 156e3b43481SJianfeng Tan mr->memory_size = seg->len; 157e3b43481SJianfeng Tan mr->mmap_offset = 0; 158e3b43481SJianfeng Tan 159e3b43481SJianfeng Tan if (k >= max_regions) { 160e3b43481SJianfeng Tan free(vm); 161e3b43481SJianfeng Tan return NULL; 162e3b43481SJianfeng Tan } 163e3b43481SJianfeng Tan } 164e3b43481SJianfeng Tan 165e3b43481SJianfeng Tan vm->nregions = k; 166e3b43481SJianfeng Tan vm->padding = 0; 167e3b43481SJianfeng Tan return vm; 168e3b43481SJianfeng Tan } 169e3b43481SJianfeng Tan 1705e97e420SJianfeng Tan /* with below features, vhost kernel does not need to do the checksum and TSO, 1715e97e420SJianfeng Tan * these info will be passed to virtio_user through virtio net header. 1725e97e420SJianfeng Tan */ 1735e97e420SJianfeng Tan #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 1745e97e420SJianfeng Tan ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 1755e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 1765e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 1775e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 1785e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_GUEST_UFO)) 1795e97e420SJianfeng Tan 1805e97e420SJianfeng Tan /* with below features, when flows from virtio_user to vhost kernel 1815e97e420SJianfeng Tan * (1) if flows goes up through the kernel networking stack, it does not need 1825e97e420SJianfeng Tan * to verify checksum, which can save CPU cycles; 1835e97e420SJianfeng Tan * (2) if flows goes through a Linux bridge and outside from an interface 1845e97e420SJianfeng Tan * (kernel driver), checksum and TSO will be done by GSO in kernel or even 1855e97e420SJianfeng Tan * offloaded into real physical device. 1865e97e420SJianfeng Tan */ 1875e97e420SJianfeng Tan #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 1885e97e420SJianfeng Tan ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 1895e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 1905e97e420SJianfeng Tan (1ULL << VIRTIO_NET_F_CSUM)) 1915e97e420SJianfeng Tan 192e3b43481SJianfeng Tan static int 193be7a4707SJianfeng Tan tap_supporte_mq(void) 194be7a4707SJianfeng Tan { 195be7a4707SJianfeng Tan int tapfd; 196be7a4707SJianfeng Tan unsigned int tap_features; 197be7a4707SJianfeng Tan 198be7a4707SJianfeng Tan tapfd = open(PATH_NET_TUN, O_RDWR); 199be7a4707SJianfeng Tan if (tapfd < 0) { 200be7a4707SJianfeng Tan PMD_DRV_LOG(ERR, "fail to open %s: %s", 201be7a4707SJianfeng Tan PATH_NET_TUN, strerror(errno)); 202be7a4707SJianfeng Tan return -1; 203be7a4707SJianfeng Tan } 204be7a4707SJianfeng Tan 205be7a4707SJianfeng Tan if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 206be7a4707SJianfeng Tan PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 207be7a4707SJianfeng Tan close(tapfd); 208be7a4707SJianfeng Tan return -1; 209be7a4707SJianfeng Tan } 210be7a4707SJianfeng Tan 211be7a4707SJianfeng Tan close(tapfd); 212be7a4707SJianfeng Tan return tap_features & IFF_MULTI_QUEUE; 213be7a4707SJianfeng Tan } 214be7a4707SJianfeng Tan 215be7a4707SJianfeng Tan static int 216e3b43481SJianfeng Tan vhost_kernel_ioctl(struct virtio_user_dev *dev, 217e3b43481SJianfeng Tan enum vhost_user_request req, 218e3b43481SJianfeng Tan void *arg) 219e3b43481SJianfeng Tan { 220e3b43481SJianfeng Tan int ret = -1; 221e3b43481SJianfeng Tan unsigned int i; 222e3b43481SJianfeng Tan uint64_t req_kernel; 223e3b43481SJianfeng Tan struct vhost_memory_kernel *vm = NULL; 224be7a4707SJianfeng Tan int vhostfd; 225be7a4707SJianfeng Tan unsigned int queue_sel; 226e3b43481SJianfeng Tan 227e3b43481SJianfeng Tan PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 228e3b43481SJianfeng Tan 229e3b43481SJianfeng Tan req_kernel = vhost_req_user_to_kernel[req]; 230e3b43481SJianfeng Tan 231e3b43481SJianfeng Tan if (req_kernel == VHOST_SET_MEM_TABLE) { 232e3b43481SJianfeng Tan vm = prepare_vhost_memory_kernel(); 233e3b43481SJianfeng Tan if (!vm) 234e3b43481SJianfeng Tan return -1; 235e3b43481SJianfeng Tan arg = (void *)vm; 236e3b43481SJianfeng Tan } 237e3b43481SJianfeng Tan 2385e97e420SJianfeng Tan if (req_kernel == VHOST_SET_FEATURES) { 239e3b43481SJianfeng Tan /* We don't need memory protection here */ 240e3b43481SJianfeng Tan *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 241e3b43481SJianfeng Tan 2425e97e420SJianfeng Tan /* VHOST kernel does not know about below flags */ 2435e97e420SJianfeng Tan *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 2445e97e420SJianfeng Tan *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 245be7a4707SJianfeng Tan 246be7a4707SJianfeng Tan *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 2475e97e420SJianfeng Tan } 2485e97e420SJianfeng Tan 249be7a4707SJianfeng Tan switch (req_kernel) { 250be7a4707SJianfeng Tan case VHOST_SET_VRING_NUM: 251be7a4707SJianfeng Tan case VHOST_SET_VRING_ADDR: 252be7a4707SJianfeng Tan case VHOST_SET_VRING_BASE: 253be7a4707SJianfeng Tan case VHOST_GET_VRING_BASE: 254be7a4707SJianfeng Tan case VHOST_SET_VRING_KICK: 255be7a4707SJianfeng Tan case VHOST_SET_VRING_CALL: 256be7a4707SJianfeng Tan queue_sel = *(unsigned int *)arg; 257be7a4707SJianfeng Tan vhostfd = dev->vhostfds[queue_sel / 2]; 258be7a4707SJianfeng Tan *(unsigned int *)arg = queue_sel % 2; 259be7a4707SJianfeng Tan PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 260be7a4707SJianfeng Tan vhostfd, *(unsigned int *)arg); 261be7a4707SJianfeng Tan break; 262be7a4707SJianfeng Tan default: 263be7a4707SJianfeng Tan vhostfd = -1; 264be7a4707SJianfeng Tan } 265be7a4707SJianfeng Tan if (vhostfd == -1) { 266e3b43481SJianfeng Tan for (i = 0; i < dev->max_queue_pairs; ++i) { 267e3b43481SJianfeng Tan if (dev->vhostfds[i] < 0) 268e3b43481SJianfeng Tan continue; 269e3b43481SJianfeng Tan 270e3b43481SJianfeng Tan ret = ioctl(dev->vhostfds[i], req_kernel, arg); 271e3b43481SJianfeng Tan if (ret < 0) 272e3b43481SJianfeng Tan break; 273e3b43481SJianfeng Tan } 274be7a4707SJianfeng Tan } else { 275be7a4707SJianfeng Tan ret = ioctl(vhostfd, req_kernel, arg); 276be7a4707SJianfeng Tan } 277e3b43481SJianfeng Tan 2785e97e420SJianfeng Tan if (!ret && req_kernel == VHOST_GET_FEATURES) { 2795e97e420SJianfeng Tan /* with tap as the backend, all these features are supported 2805e97e420SJianfeng Tan * but not claimed by vhost-net, so we add them back when 2815e97e420SJianfeng Tan * reporting to upper layer. 2825e97e420SJianfeng Tan */ 2835e97e420SJianfeng Tan *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 2845e97e420SJianfeng Tan *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 285be7a4707SJianfeng Tan 286be7a4707SJianfeng Tan /* vhost_kernel will not declare this feature, but it does 287be7a4707SJianfeng Tan * support multi-queue. 288be7a4707SJianfeng Tan */ 289be7a4707SJianfeng Tan if (tap_supporte_mq()) 290be7a4707SJianfeng Tan *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 2915e97e420SJianfeng Tan } 2925e97e420SJianfeng Tan 293e3b43481SJianfeng Tan if (vm) 294e3b43481SJianfeng Tan free(vm); 295e3b43481SJianfeng Tan 296e3b43481SJianfeng Tan if (ret < 0) 297e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "%s failed: %s", 298e3b43481SJianfeng Tan vhost_msg_strings[req], strerror(errno)); 299e3b43481SJianfeng Tan 300e3b43481SJianfeng Tan return ret; 301e3b43481SJianfeng Tan } 302e3b43481SJianfeng Tan 303e3b43481SJianfeng Tan /** 304e3b43481SJianfeng Tan * Set up environment to talk with a vhost kernel backend. 305e3b43481SJianfeng Tan * 306e3b43481SJianfeng Tan * @return 307e3b43481SJianfeng Tan * - (-1) if fail to set up; 308e3b43481SJianfeng Tan * - (>=0) if successful. 309e3b43481SJianfeng Tan */ 310e3b43481SJianfeng Tan static int 311e3b43481SJianfeng Tan vhost_kernel_setup(struct virtio_user_dev *dev) 312e3b43481SJianfeng Tan { 313e3b43481SJianfeng Tan int vhostfd; 314e3b43481SJianfeng Tan uint32_t i; 315e3b43481SJianfeng Tan 316e3b43481SJianfeng Tan get_vhost_kernel_max_regions(); 317e3b43481SJianfeng Tan 318e3b43481SJianfeng Tan for (i = 0; i < dev->max_queue_pairs; ++i) { 319e3b43481SJianfeng Tan vhostfd = open(dev->path, O_RDWR); 320e3b43481SJianfeng Tan if (vhostfd < 0) { 321e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "fail to open %s, %s", 322e3b43481SJianfeng Tan dev->path, strerror(errno)); 323e3b43481SJianfeng Tan return -1; 324e3b43481SJianfeng Tan } 325e3b43481SJianfeng Tan 326e3b43481SJianfeng Tan dev->vhostfds[i] = vhostfd; 327e3b43481SJianfeng Tan } 328e3b43481SJianfeng Tan 329e3b43481SJianfeng Tan return 0; 330e3b43481SJianfeng Tan } 331e3b43481SJianfeng Tan 332e3b43481SJianfeng Tan static int 333e3b43481SJianfeng Tan vhost_kernel_set_backend(int vhostfd, int tapfd) 334e3b43481SJianfeng Tan { 335e3b43481SJianfeng Tan struct vhost_vring_file f; 336e3b43481SJianfeng Tan 337e3b43481SJianfeng Tan f.fd = tapfd; 338e3b43481SJianfeng Tan f.index = 0; 339e3b43481SJianfeng Tan if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 340e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 341e3b43481SJianfeng Tan strerror(errno)); 342e3b43481SJianfeng Tan return -1; 343e3b43481SJianfeng Tan } 344e3b43481SJianfeng Tan 345e3b43481SJianfeng Tan f.index = 1; 346e3b43481SJianfeng Tan if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 347e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 348e3b43481SJianfeng Tan strerror(errno)); 349e3b43481SJianfeng Tan return -1; 350e3b43481SJianfeng Tan } 351e3b43481SJianfeng Tan 352e3b43481SJianfeng Tan return 0; 353e3b43481SJianfeng Tan } 354e3b43481SJianfeng Tan 355e3b43481SJianfeng Tan static int 356e3b43481SJianfeng Tan vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 357e3b43481SJianfeng Tan uint16_t pair_idx, 358e3b43481SJianfeng Tan int enable) 359e3b43481SJianfeng Tan { 360e3b43481SJianfeng Tan int hdr_size; 361e3b43481SJianfeng Tan int vhostfd; 362e3b43481SJianfeng Tan int tapfd; 363be7a4707SJianfeng Tan int req_mq = (dev->max_queue_pairs > 1); 364e3b43481SJianfeng Tan 365e3b43481SJianfeng Tan vhostfd = dev->vhostfds[pair_idx]; 366e3b43481SJianfeng Tan 367e3b43481SJianfeng Tan if (!enable) { 368e3b43481SJianfeng Tan if (dev->tapfds[pair_idx]) { 369e3b43481SJianfeng Tan close(dev->tapfds[pair_idx]); 370e3b43481SJianfeng Tan dev->tapfds[pair_idx] = -1; 371e3b43481SJianfeng Tan } 372e3b43481SJianfeng Tan return vhost_kernel_set_backend(vhostfd, -1); 373e3b43481SJianfeng Tan } else if (dev->tapfds[pair_idx] >= 0) { 374e3b43481SJianfeng Tan return 0; 375e3b43481SJianfeng Tan } 376e3b43481SJianfeng Tan 377e3b43481SJianfeng Tan if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 378e3b43481SJianfeng Tan (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 379e3b43481SJianfeng Tan hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 380e3b43481SJianfeng Tan else 381e3b43481SJianfeng Tan hdr_size = sizeof(struct virtio_net_hdr); 382e3b43481SJianfeng Tan 383be7a4707SJianfeng Tan tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); 384e3b43481SJianfeng Tan if (tapfd < 0) { 385e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 386e3b43481SJianfeng Tan return -1; 387e3b43481SJianfeng Tan } 388e3b43481SJianfeng Tan 389e3b43481SJianfeng Tan if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 390e3b43481SJianfeng Tan PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 391e3b43481SJianfeng Tan close(tapfd); 392e3b43481SJianfeng Tan return -1; 393e3b43481SJianfeng Tan } 394e3b43481SJianfeng Tan 395e3b43481SJianfeng Tan dev->tapfds[pair_idx] = tapfd; 396e3b43481SJianfeng Tan return 0; 397e3b43481SJianfeng Tan } 398e3b43481SJianfeng Tan 399e3b43481SJianfeng Tan struct virtio_user_backend_ops ops_kernel = { 400e3b43481SJianfeng Tan .setup = vhost_kernel_setup, 401e3b43481SJianfeng Tan .send_request = vhost_kernel_ioctl, 402e3b43481SJianfeng Tan .enable_qp = vhost_kernel_enable_queue_pair 403e3b43481SJianfeng Tan }; 404