1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 10 #include <rte_memory.h> 11 #include <rte_eal_memconfig.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_memory_kernel { 18 uint32_t nregions; 19 uint32_t padding; 20 struct vhost_memory_region regions[0]; 21 }; 22 23 /* vhost kernel ioctls */ 24 #define VHOST_VIRTIO 0xAF 25 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 26 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 27 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 28 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 29 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 30 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 31 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 32 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 33 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 34 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 35 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 36 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 37 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 38 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 39 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 40 41 static uint64_t max_regions = 64; 42 43 static void 44 get_vhost_kernel_max_regions(void) 45 { 46 int fd; 47 char buf[20] = {'\0'}; 48 49 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 50 if (fd < 0) 51 return; 52 53 if (read(fd, buf, sizeof(buf) - 1) > 0) 54 max_regions = strtoull(buf, NULL, 10); 55 56 close(fd); 57 } 58 59 static uint64_t vhost_req_user_to_kernel[] = { 60 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 61 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 62 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 63 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 64 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 65 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 66 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 67 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 68 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 69 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 70 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 71 }; 72 73 static int 74 add_memseg_list(const struct rte_memseg_list *msl, void *arg) 75 { 76 struct vhost_memory_kernel *vm = arg; 77 struct vhost_memory_region *mr; 78 void *start_addr; 79 uint64_t len; 80 81 if (msl->external) 82 return 0; 83 84 if (vm->nregions >= max_regions) 85 return -1; 86 87 start_addr = msl->base_va; 88 len = msl->page_sz * msl->memseg_arr.len; 89 90 mr = &vm->regions[vm->nregions++]; 91 92 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 93 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 94 mr->memory_size = len; 95 mr->mmap_offset = 0; /* flags_padding */ 96 97 PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, 98 vm->nregions - 1, start_addr, len); 99 100 return 0; 101 } 102 103 /* By default, vhost kernel module allows 64 regions, but DPDK may 104 * have much more memory regions. Below function will treat each 105 * contiguous memory space reserved by DPDK as one region. 106 */ 107 static struct vhost_memory_kernel * 108 prepare_vhost_memory_kernel(void) 109 { 110 struct vhost_memory_kernel *vm; 111 112 vm = malloc(sizeof(struct vhost_memory_kernel) + 113 max_regions * 114 sizeof(struct vhost_memory_region)); 115 if (!vm) 116 return NULL; 117 118 vm->nregions = 0; 119 vm->padding = 0; 120 121 /* 122 * The memory lock has already been taken by memory subsystem 123 * or virtio_user_start_device(). 124 */ 125 if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) { 126 free(vm); 127 return NULL; 128 } 129 130 return vm; 131 } 132 133 /* with below features, vhost kernel does not need to do the checksum and TSO, 134 * these info will be passed to virtio_user through virtio net header. 135 */ 136 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 137 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 138 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 139 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 140 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 141 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 142 143 /* with below features, when flows from virtio_user to vhost kernel 144 * (1) if flows goes up through the kernel networking stack, it does not need 145 * to verify checksum, which can save CPU cycles; 146 * (2) if flows goes through a Linux bridge and outside from an interface 147 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 148 * offloaded into real physical device. 149 */ 150 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 151 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 152 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 153 (1ULL << VIRTIO_NET_F_CSUM)) 154 155 static unsigned int 156 tap_support_features(void) 157 { 158 int tapfd; 159 unsigned int tap_features; 160 161 tapfd = open(PATH_NET_TUN, O_RDWR); 162 if (tapfd < 0) { 163 PMD_DRV_LOG(ERR, "fail to open %s: %s", 164 PATH_NET_TUN, strerror(errno)); 165 return -1; 166 } 167 168 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 169 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 170 close(tapfd); 171 return -1; 172 } 173 174 close(tapfd); 175 return tap_features; 176 } 177 178 static int 179 vhost_kernel_ioctl(struct virtio_user_dev *dev, 180 enum vhost_user_request req, 181 void *arg) 182 { 183 int ret = -1; 184 unsigned int i; 185 uint64_t req_kernel; 186 struct vhost_memory_kernel *vm = NULL; 187 int vhostfd; 188 unsigned int queue_sel; 189 unsigned int features; 190 191 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 192 193 req_kernel = vhost_req_user_to_kernel[req]; 194 195 if (req_kernel == VHOST_SET_MEM_TABLE) { 196 vm = prepare_vhost_memory_kernel(); 197 if (!vm) 198 return -1; 199 arg = (void *)vm; 200 } 201 202 if (req_kernel == VHOST_SET_FEATURES) { 203 /* We don't need memory protection here */ 204 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 205 206 /* VHOST kernel does not know about below flags */ 207 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 208 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 209 210 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 211 } 212 213 switch (req_kernel) { 214 case VHOST_SET_VRING_NUM: 215 case VHOST_SET_VRING_ADDR: 216 case VHOST_SET_VRING_BASE: 217 case VHOST_GET_VRING_BASE: 218 case VHOST_SET_VRING_KICK: 219 case VHOST_SET_VRING_CALL: 220 queue_sel = *(unsigned int *)arg; 221 vhostfd = dev->vhostfds[queue_sel / 2]; 222 *(unsigned int *)arg = queue_sel % 2; 223 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 224 vhostfd, *(unsigned int *)arg); 225 break; 226 default: 227 vhostfd = -1; 228 } 229 if (vhostfd == -1) { 230 for (i = 0; i < dev->max_queue_pairs; ++i) { 231 if (dev->vhostfds[i] < 0) 232 continue; 233 234 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 235 if (ret < 0) 236 break; 237 } 238 } else { 239 ret = ioctl(vhostfd, req_kernel, arg); 240 } 241 242 if (!ret && req_kernel == VHOST_GET_FEATURES) { 243 features = tap_support_features(); 244 /* with tap as the backend, all these features are supported 245 * but not claimed by vhost-net, so we add them back when 246 * reporting to upper layer. 247 */ 248 if (features & IFF_VNET_HDR) { 249 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 250 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 251 } 252 253 /* vhost_kernel will not declare this feature, but it does 254 * support multi-queue. 255 */ 256 if (features & IFF_MULTI_QUEUE) 257 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 258 } 259 260 if (vm) 261 free(vm); 262 263 if (ret < 0) 264 PMD_DRV_LOG(ERR, "%s failed: %s", 265 vhost_msg_strings[req], strerror(errno)); 266 267 return ret; 268 } 269 270 /** 271 * Set up environment to talk with a vhost kernel backend. 272 * 273 * @return 274 * - (-1) if fail to set up; 275 * - (>=0) if successful. 276 */ 277 static int 278 vhost_kernel_setup(struct virtio_user_dev *dev) 279 { 280 int vhostfd; 281 uint32_t i; 282 283 get_vhost_kernel_max_regions(); 284 285 for (i = 0; i < dev->max_queue_pairs; ++i) { 286 vhostfd = open(dev->path, O_RDWR); 287 if (vhostfd < 0) { 288 PMD_DRV_LOG(ERR, "fail to open %s, %s", 289 dev->path, strerror(errno)); 290 return -1; 291 } 292 293 dev->vhostfds[i] = vhostfd; 294 } 295 296 return 0; 297 } 298 299 static int 300 vhost_kernel_set_backend(int vhostfd, int tapfd) 301 { 302 struct vhost_vring_file f; 303 304 f.fd = tapfd; 305 f.index = 0; 306 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 307 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 308 strerror(errno)); 309 return -1; 310 } 311 312 f.index = 1; 313 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 314 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 315 strerror(errno)); 316 return -1; 317 } 318 319 return 0; 320 } 321 322 static int 323 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 324 uint16_t pair_idx, 325 int enable) 326 { 327 int hdr_size; 328 int vhostfd; 329 int tapfd; 330 int req_mq = (dev->max_queue_pairs > 1); 331 332 vhostfd = dev->vhostfds[pair_idx]; 333 334 if (!enable) { 335 if (dev->tapfds[pair_idx] >= 0) { 336 close(dev->tapfds[pair_idx]); 337 dev->tapfds[pair_idx] = -1; 338 } 339 return vhost_kernel_set_backend(vhostfd, -1); 340 } else if (dev->tapfds[pair_idx] >= 0) { 341 return 0; 342 } 343 344 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 345 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 346 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 347 else 348 hdr_size = sizeof(struct virtio_net_hdr); 349 350 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 351 (char *)dev->mac_addr, dev->features); 352 if (tapfd < 0) { 353 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 354 return -1; 355 } 356 357 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 358 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 359 close(tapfd); 360 return -1; 361 } 362 363 dev->tapfds[pair_idx] = tapfd; 364 return 0; 365 } 366 367 struct virtio_user_backend_ops ops_kernel = { 368 .setup = vhost_kernel_setup, 369 .send_request = vhost_kernel_ioctl, 370 .enable_qp = vhost_kernel_enable_queue_pair 371 }; 372