1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 10 #include <rte_memory.h> 11 #include <rte_eal_memconfig.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_memory_kernel { 18 uint32_t nregions; 19 uint32_t padding; 20 struct vhost_memory_region regions[0]; 21 }; 22 23 /* vhost kernel ioctls */ 24 #define VHOST_VIRTIO 0xAF 25 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 26 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 27 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 28 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 29 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 30 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 31 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 32 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 33 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 34 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 35 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 36 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 37 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 38 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 39 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 40 41 static uint64_t max_regions = 64; 42 43 static void 44 get_vhost_kernel_max_regions(void) 45 { 46 int fd; 47 char buf[20] = {'\0'}; 48 49 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 50 if (fd < 0) 51 return; 52 53 if (read(fd, buf, sizeof(buf) - 1) > 0) 54 max_regions = strtoull(buf, NULL, 10); 55 56 close(fd); 57 } 58 59 static uint64_t vhost_req_user_to_kernel[] = { 60 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 61 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 62 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 63 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 64 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 65 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 66 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 67 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 68 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 69 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 70 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 71 }; 72 73 static int 74 add_memseg_list(const struct rte_memseg_list *msl, void *arg) 75 { 76 struct vhost_memory_kernel *vm = arg; 77 struct vhost_memory_region *mr; 78 void *start_addr; 79 uint64_t len; 80 81 if (vm->nregions >= max_regions) 82 return -1; 83 84 start_addr = msl->base_va; 85 len = msl->page_sz * msl->memseg_arr.len; 86 87 mr = &vm->regions[vm->nregions++]; 88 89 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 90 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 91 mr->memory_size = len; 92 mr->mmap_offset = 0; /* flags_padding */ 93 94 PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, 95 vm->nregions - 1, start_addr, len); 96 97 return 0; 98 } 99 100 /* By default, vhost kernel module allows 64 regions, but DPDK may 101 * have much more memory regions. Below function will treat each 102 * contiguous memory space reserved by DPDK as one region. 103 */ 104 static struct vhost_memory_kernel * 105 prepare_vhost_memory_kernel(void) 106 { 107 struct vhost_memory_kernel *vm; 108 109 vm = malloc(sizeof(struct vhost_memory_kernel) + 110 max_regions * 111 sizeof(struct vhost_memory_region)); 112 if (!vm) 113 return NULL; 114 115 vm->nregions = 0; 116 vm->padding = 0; 117 118 /* 119 * The memory lock has already been taken by memory subsystem 120 * or virtio_user_start_device(). 121 */ 122 if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) { 123 free(vm); 124 return NULL; 125 } 126 127 return vm; 128 } 129 130 /* with below features, vhost kernel does not need to do the checksum and TSO, 131 * these info will be passed to virtio_user through virtio net header. 132 */ 133 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 134 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 135 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 136 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 137 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 138 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 139 140 /* with below features, when flows from virtio_user to vhost kernel 141 * (1) if flows goes up through the kernel networking stack, it does not need 142 * to verify checksum, which can save CPU cycles; 143 * (2) if flows goes through a Linux bridge and outside from an interface 144 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 145 * offloaded into real physical device. 146 */ 147 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 148 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 149 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 150 (1ULL << VIRTIO_NET_F_CSUM)) 151 152 static unsigned int 153 tap_support_features(void) 154 { 155 int tapfd; 156 unsigned int tap_features; 157 158 tapfd = open(PATH_NET_TUN, O_RDWR); 159 if (tapfd < 0) { 160 PMD_DRV_LOG(ERR, "fail to open %s: %s", 161 PATH_NET_TUN, strerror(errno)); 162 return -1; 163 } 164 165 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 166 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 167 close(tapfd); 168 return -1; 169 } 170 171 close(tapfd); 172 return tap_features; 173 } 174 175 static int 176 vhost_kernel_ioctl(struct virtio_user_dev *dev, 177 enum vhost_user_request req, 178 void *arg) 179 { 180 int ret = -1; 181 unsigned int i; 182 uint64_t req_kernel; 183 struct vhost_memory_kernel *vm = NULL; 184 int vhostfd; 185 unsigned int queue_sel; 186 unsigned int features; 187 188 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 189 190 req_kernel = vhost_req_user_to_kernel[req]; 191 192 if (req_kernel == VHOST_SET_MEM_TABLE) { 193 vm = prepare_vhost_memory_kernel(); 194 if (!vm) 195 return -1; 196 arg = (void *)vm; 197 } 198 199 if (req_kernel == VHOST_SET_FEATURES) { 200 /* We don't need memory protection here */ 201 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 202 203 /* VHOST kernel does not know about below flags */ 204 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 205 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 206 207 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 208 } 209 210 switch (req_kernel) { 211 case VHOST_SET_VRING_NUM: 212 case VHOST_SET_VRING_ADDR: 213 case VHOST_SET_VRING_BASE: 214 case VHOST_GET_VRING_BASE: 215 case VHOST_SET_VRING_KICK: 216 case VHOST_SET_VRING_CALL: 217 queue_sel = *(unsigned int *)arg; 218 vhostfd = dev->vhostfds[queue_sel / 2]; 219 *(unsigned int *)arg = queue_sel % 2; 220 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 221 vhostfd, *(unsigned int *)arg); 222 break; 223 default: 224 vhostfd = -1; 225 } 226 if (vhostfd == -1) { 227 for (i = 0; i < dev->max_queue_pairs; ++i) { 228 if (dev->vhostfds[i] < 0) 229 continue; 230 231 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 232 if (ret < 0) 233 break; 234 } 235 } else { 236 ret = ioctl(vhostfd, req_kernel, arg); 237 } 238 239 if (!ret && req_kernel == VHOST_GET_FEATURES) { 240 features = tap_support_features(); 241 /* with tap as the backend, all these features are supported 242 * but not claimed by vhost-net, so we add them back when 243 * reporting to upper layer. 244 */ 245 if (features & IFF_VNET_HDR) { 246 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 247 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 248 } 249 250 /* vhost_kernel will not declare this feature, but it does 251 * support multi-queue. 252 */ 253 if (features & IFF_MULTI_QUEUE) 254 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 255 } 256 257 if (vm) 258 free(vm); 259 260 if (ret < 0) 261 PMD_DRV_LOG(ERR, "%s failed: %s", 262 vhost_msg_strings[req], strerror(errno)); 263 264 return ret; 265 } 266 267 /** 268 * Set up environment to talk with a vhost kernel backend. 269 * 270 * @return 271 * - (-1) if fail to set up; 272 * - (>=0) if successful. 273 */ 274 static int 275 vhost_kernel_setup(struct virtio_user_dev *dev) 276 { 277 int vhostfd; 278 uint32_t i; 279 280 get_vhost_kernel_max_regions(); 281 282 for (i = 0; i < dev->max_queue_pairs; ++i) { 283 vhostfd = open(dev->path, O_RDWR); 284 if (vhostfd < 0) { 285 PMD_DRV_LOG(ERR, "fail to open %s, %s", 286 dev->path, strerror(errno)); 287 return -1; 288 } 289 290 dev->vhostfds[i] = vhostfd; 291 } 292 293 return 0; 294 } 295 296 static int 297 vhost_kernel_set_backend(int vhostfd, int tapfd) 298 { 299 struct vhost_vring_file f; 300 301 f.fd = tapfd; 302 f.index = 0; 303 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 304 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 305 strerror(errno)); 306 return -1; 307 } 308 309 f.index = 1; 310 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 311 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 312 strerror(errno)); 313 return -1; 314 } 315 316 return 0; 317 } 318 319 static int 320 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 321 uint16_t pair_idx, 322 int enable) 323 { 324 int hdr_size; 325 int vhostfd; 326 int tapfd; 327 int req_mq = (dev->max_queue_pairs > 1); 328 329 vhostfd = dev->vhostfds[pair_idx]; 330 331 if (!enable) { 332 if (dev->tapfds[pair_idx] >= 0) { 333 close(dev->tapfds[pair_idx]); 334 dev->tapfds[pair_idx] = -1; 335 } 336 return vhost_kernel_set_backend(vhostfd, -1); 337 } else if (dev->tapfds[pair_idx] >= 0) { 338 return 0; 339 } 340 341 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 342 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 343 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 344 else 345 hdr_size = sizeof(struct virtio_net_hdr); 346 347 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 348 (char *)dev->mac_addr, dev->features); 349 if (tapfd < 0) { 350 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 351 return -1; 352 } 353 354 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 355 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 356 close(tapfd); 357 return -1; 358 } 359 360 dev->tapfds[pair_idx] = tapfd; 361 return 0; 362 } 363 364 struct virtio_user_backend_ops ops_kernel = { 365 .setup = vhost_kernel_setup, 366 .send_request = vhost_kernel_ioctl, 367 .enable_qp = vhost_kernel_enable_queue_pair 368 }; 369