1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 10 #include <rte_memory.h> 11 #include <rte_eal_memconfig.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_memory_kernel { 18 uint32_t nregions; 19 uint32_t padding; 20 struct vhost_memory_region regions[0]; 21 }; 22 23 /* vhost kernel ioctls */ 24 #define VHOST_VIRTIO 0xAF 25 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 26 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 27 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 28 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 29 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 30 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 31 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 32 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 33 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 34 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 35 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 36 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 37 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 38 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 39 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 40 41 static uint64_t max_regions = 64; 42 43 static void 44 get_vhost_kernel_max_regions(void) 45 { 46 int fd; 47 char buf[20] = {'\0'}; 48 49 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 50 if (fd < 0) 51 return; 52 53 if (read(fd, buf, sizeof(buf) - 1) > 0) 54 max_regions = strtoull(buf, NULL, 10); 55 56 close(fd); 57 } 58 59 static uint64_t vhost_req_user_to_kernel[] = { 60 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 61 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 62 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 63 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 64 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 65 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 66 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 67 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 68 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 69 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 70 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 71 }; 72 73 /* By default, vhost kernel module allows 64 regions, but DPDK allows 74 * 256 segments. As a relief, below function merges those virtually 75 * adjacent memsegs into one region. 76 */ 77 static struct vhost_memory_kernel * 78 prepare_vhost_memory_kernel(void) 79 { 80 uint32_t i, j, k = 0; 81 struct rte_memseg *seg; 82 struct vhost_memory_region *mr; 83 struct vhost_memory_kernel *vm; 84 85 vm = malloc(sizeof(struct vhost_memory_kernel) + 86 max_regions * 87 sizeof(struct vhost_memory_region)); 88 if (!vm) 89 return NULL; 90 91 for (i = 0; i < RTE_MAX_MEMSEG; ++i) { 92 seg = &rte_eal_get_configuration()->mem_config->memseg[i]; 93 if (!seg->addr) 94 break; 95 96 int new_region = 1; 97 98 for (j = 0; j < k; ++j) { 99 mr = &vm->regions[j]; 100 101 if (mr->userspace_addr + mr->memory_size == 102 (uint64_t)(uintptr_t)seg->addr) { 103 mr->memory_size += seg->len; 104 new_region = 0; 105 break; 106 } 107 108 if ((uint64_t)(uintptr_t)seg->addr + seg->len == 109 mr->userspace_addr) { 110 mr->guest_phys_addr = 111 (uint64_t)(uintptr_t)seg->addr; 112 mr->userspace_addr = 113 (uint64_t)(uintptr_t)seg->addr; 114 mr->memory_size += seg->len; 115 new_region = 0; 116 break; 117 } 118 } 119 120 if (new_region == 0) 121 continue; 122 123 mr = &vm->regions[k++]; 124 /* use vaddr here! */ 125 mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; 126 mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; 127 mr->memory_size = seg->len; 128 mr->mmap_offset = 0; 129 130 if (k >= max_regions) { 131 free(vm); 132 return NULL; 133 } 134 } 135 136 vm->nregions = k; 137 vm->padding = 0; 138 return vm; 139 } 140 141 /* with below features, vhost kernel does not need to do the checksum and TSO, 142 * these info will be passed to virtio_user through virtio net header. 143 */ 144 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 145 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 146 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 147 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 148 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 149 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 150 151 /* with below features, when flows from virtio_user to vhost kernel 152 * (1) if flows goes up through the kernel networking stack, it does not need 153 * to verify checksum, which can save CPU cycles; 154 * (2) if flows goes through a Linux bridge and outside from an interface 155 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 156 * offloaded into real physical device. 157 */ 158 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 159 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 160 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 161 (1ULL << VIRTIO_NET_F_CSUM)) 162 163 static int 164 tap_supporte_mq(void) 165 { 166 int tapfd; 167 unsigned int tap_features; 168 169 tapfd = open(PATH_NET_TUN, O_RDWR); 170 if (tapfd < 0) { 171 PMD_DRV_LOG(ERR, "fail to open %s: %s", 172 PATH_NET_TUN, strerror(errno)); 173 return -1; 174 } 175 176 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 177 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 178 close(tapfd); 179 return -1; 180 } 181 182 close(tapfd); 183 return tap_features & IFF_MULTI_QUEUE; 184 } 185 186 static int 187 vhost_kernel_ioctl(struct virtio_user_dev *dev, 188 enum vhost_user_request req, 189 void *arg) 190 { 191 int ret = -1; 192 unsigned int i; 193 uint64_t req_kernel; 194 struct vhost_memory_kernel *vm = NULL; 195 int vhostfd; 196 unsigned int queue_sel; 197 198 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 199 200 req_kernel = vhost_req_user_to_kernel[req]; 201 202 if (req_kernel == VHOST_SET_MEM_TABLE) { 203 vm = prepare_vhost_memory_kernel(); 204 if (!vm) 205 return -1; 206 arg = (void *)vm; 207 } 208 209 if (req_kernel == VHOST_SET_FEATURES) { 210 /* We don't need memory protection here */ 211 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 212 213 /* VHOST kernel does not know about below flags */ 214 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 215 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 216 217 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 218 } 219 220 switch (req_kernel) { 221 case VHOST_SET_VRING_NUM: 222 case VHOST_SET_VRING_ADDR: 223 case VHOST_SET_VRING_BASE: 224 case VHOST_GET_VRING_BASE: 225 case VHOST_SET_VRING_KICK: 226 case VHOST_SET_VRING_CALL: 227 queue_sel = *(unsigned int *)arg; 228 vhostfd = dev->vhostfds[queue_sel / 2]; 229 *(unsigned int *)arg = queue_sel % 2; 230 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 231 vhostfd, *(unsigned int *)arg); 232 break; 233 default: 234 vhostfd = -1; 235 } 236 if (vhostfd == -1) { 237 for (i = 0; i < dev->max_queue_pairs; ++i) { 238 if (dev->vhostfds[i] < 0) 239 continue; 240 241 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 242 if (ret < 0) 243 break; 244 } 245 } else { 246 ret = ioctl(vhostfd, req_kernel, arg); 247 } 248 249 if (!ret && req_kernel == VHOST_GET_FEATURES) { 250 /* with tap as the backend, all these features are supported 251 * but not claimed by vhost-net, so we add them back when 252 * reporting to upper layer. 253 */ 254 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 255 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 256 257 /* vhost_kernel will not declare this feature, but it does 258 * support multi-queue. 259 */ 260 if (tap_supporte_mq()) 261 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 262 } 263 264 if (vm) 265 free(vm); 266 267 if (ret < 0) 268 PMD_DRV_LOG(ERR, "%s failed: %s", 269 vhost_msg_strings[req], strerror(errno)); 270 271 return ret; 272 } 273 274 /** 275 * Set up environment to talk with a vhost kernel backend. 276 * 277 * @return 278 * - (-1) if fail to set up; 279 * - (>=0) if successful. 280 */ 281 static int 282 vhost_kernel_setup(struct virtio_user_dev *dev) 283 { 284 int vhostfd; 285 uint32_t i; 286 287 get_vhost_kernel_max_regions(); 288 289 for (i = 0; i < dev->max_queue_pairs; ++i) { 290 vhostfd = open(dev->path, O_RDWR); 291 if (vhostfd < 0) { 292 PMD_DRV_LOG(ERR, "fail to open %s, %s", 293 dev->path, strerror(errno)); 294 return -1; 295 } 296 297 dev->vhostfds[i] = vhostfd; 298 } 299 300 return 0; 301 } 302 303 static int 304 vhost_kernel_set_backend(int vhostfd, int tapfd) 305 { 306 struct vhost_vring_file f; 307 308 f.fd = tapfd; 309 f.index = 0; 310 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 311 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 312 strerror(errno)); 313 return -1; 314 } 315 316 f.index = 1; 317 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 318 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 319 strerror(errno)); 320 return -1; 321 } 322 323 return 0; 324 } 325 326 static int 327 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 328 uint16_t pair_idx, 329 int enable) 330 { 331 int hdr_size; 332 int vhostfd; 333 int tapfd; 334 int req_mq = (dev->max_queue_pairs > 1); 335 336 vhostfd = dev->vhostfds[pair_idx]; 337 338 if (!enable) { 339 if (dev->tapfds[pair_idx] >= 0) { 340 close(dev->tapfds[pair_idx]); 341 dev->tapfds[pair_idx] = -1; 342 } 343 return vhost_kernel_set_backend(vhostfd, -1); 344 } else if (dev->tapfds[pair_idx] >= 0) { 345 return 0; 346 } 347 348 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 349 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 350 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 351 else 352 hdr_size = sizeof(struct virtio_net_hdr); 353 354 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); 355 if (tapfd < 0) { 356 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 357 return -1; 358 } 359 360 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 361 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 362 close(tapfd); 363 return -1; 364 } 365 366 dev->tapfds[pair_idx] = tapfd; 367 return 0; 368 } 369 370 struct virtio_user_backend_ops ops_kernel = { 371 .setup = vhost_kernel_setup, 372 .send_request = vhost_kernel_ioctl, 373 .enable_qp = vhost_kernel_enable_queue_pair 374 }; 375