1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <fcntl.h> 37 #include <unistd.h> 38 39 #include <rte_memory.h> 40 #include <rte_eal_memconfig.h> 41 42 #include "vhost.h" 43 #include "virtio_user_dev.h" 44 #include "vhost_kernel_tap.h" 45 46 struct vhost_memory_kernel { 47 uint32_t nregions; 48 uint32_t padding; 49 struct vhost_memory_region regions[0]; 50 }; 51 52 /* vhost kernel ioctls */ 53 #define VHOST_VIRTIO 0xAF 54 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 55 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 56 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 57 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 58 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 59 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 60 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 61 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 62 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 63 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 64 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 65 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 66 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 67 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 68 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 69 70 static uint64_t max_regions = 64; 71 72 static void 73 get_vhost_kernel_max_regions(void) 74 { 75 int fd; 76 char buf[20] = {'\0'}; 77 78 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 79 if (fd < 0) 80 return; 81 82 if (read(fd, buf, sizeof(buf) - 1) > 0) 83 max_regions = strtoull(buf, NULL, 10); 84 85 close(fd); 86 } 87 88 static uint64_t vhost_req_user_to_kernel[] = { 89 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 90 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 91 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 92 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 93 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 94 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 95 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 96 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 97 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 98 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 99 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 100 }; 101 102 /* By default, vhost kernel module allows 64 regions, but DPDK allows 103 * 256 segments. As a relief, below function merges those virtually 104 * adjacent memsegs into one region. 105 */ 106 static struct vhost_memory_kernel * 107 prepare_vhost_memory_kernel(void) 108 { 109 uint32_t i, j, k = 0; 110 struct rte_memseg *seg; 111 struct vhost_memory_region *mr; 112 struct vhost_memory_kernel *vm; 113 114 vm = malloc(sizeof(struct vhost_memory_kernel) + 115 max_regions * 116 sizeof(struct vhost_memory_region)); 117 if (!vm) 118 return NULL; 119 120 for (i = 0; i < RTE_MAX_MEMSEG; ++i) { 121 seg = &rte_eal_get_configuration()->mem_config->memseg[i]; 122 if (!seg->addr) 123 break; 124 125 int new_region = 1; 126 127 for (j = 0; j < k; ++j) { 128 mr = &vm->regions[j]; 129 130 if (mr->userspace_addr + mr->memory_size == 131 (uint64_t)(uintptr_t)seg->addr) { 132 mr->memory_size += seg->len; 133 new_region = 0; 134 break; 135 } 136 137 if ((uint64_t)(uintptr_t)seg->addr + seg->len == 138 mr->userspace_addr) { 139 mr->guest_phys_addr = 140 (uint64_t)(uintptr_t)seg->addr; 141 mr->userspace_addr = 142 (uint64_t)(uintptr_t)seg->addr; 143 mr->memory_size += seg->len; 144 new_region = 0; 145 break; 146 } 147 } 148 149 if (new_region == 0) 150 continue; 151 152 mr = &vm->regions[k++]; 153 /* use vaddr here! */ 154 mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; 155 mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; 156 mr->memory_size = seg->len; 157 mr->mmap_offset = 0; 158 159 if (k >= max_regions) { 160 free(vm); 161 return NULL; 162 } 163 } 164 165 vm->nregions = k; 166 vm->padding = 0; 167 return vm; 168 } 169 170 /* with below features, vhost kernel does not need to do the checksum and TSO, 171 * these info will be passed to virtio_user through virtio net header. 172 */ 173 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 174 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 175 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 176 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 177 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 178 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 179 180 /* with below features, when flows from virtio_user to vhost kernel 181 * (1) if flows goes up through the kernel networking stack, it does not need 182 * to verify checksum, which can save CPU cycles; 183 * (2) if flows goes through a Linux bridge and outside from an interface 184 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 185 * offloaded into real physical device. 186 */ 187 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 188 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 189 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 190 (1ULL << VIRTIO_NET_F_CSUM)) 191 192 static int 193 tap_supporte_mq(void) 194 { 195 int tapfd; 196 unsigned int tap_features; 197 198 tapfd = open(PATH_NET_TUN, O_RDWR); 199 if (tapfd < 0) { 200 PMD_DRV_LOG(ERR, "fail to open %s: %s", 201 PATH_NET_TUN, strerror(errno)); 202 return -1; 203 } 204 205 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 206 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 207 close(tapfd); 208 return -1; 209 } 210 211 close(tapfd); 212 return tap_features & IFF_MULTI_QUEUE; 213 } 214 215 static int 216 vhost_kernel_ioctl(struct virtio_user_dev *dev, 217 enum vhost_user_request req, 218 void *arg) 219 { 220 int ret = -1; 221 unsigned int i; 222 uint64_t req_kernel; 223 struct vhost_memory_kernel *vm = NULL; 224 int vhostfd; 225 unsigned int queue_sel; 226 227 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 228 229 req_kernel = vhost_req_user_to_kernel[req]; 230 231 if (req_kernel == VHOST_SET_MEM_TABLE) { 232 vm = prepare_vhost_memory_kernel(); 233 if (!vm) 234 return -1; 235 arg = (void *)vm; 236 } 237 238 if (req_kernel == VHOST_SET_FEATURES) { 239 /* We don't need memory protection here */ 240 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 241 242 /* VHOST kernel does not know about below flags */ 243 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 244 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 245 246 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 247 } 248 249 switch (req_kernel) { 250 case VHOST_SET_VRING_NUM: 251 case VHOST_SET_VRING_ADDR: 252 case VHOST_SET_VRING_BASE: 253 case VHOST_GET_VRING_BASE: 254 case VHOST_SET_VRING_KICK: 255 case VHOST_SET_VRING_CALL: 256 queue_sel = *(unsigned int *)arg; 257 vhostfd = dev->vhostfds[queue_sel / 2]; 258 *(unsigned int *)arg = queue_sel % 2; 259 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 260 vhostfd, *(unsigned int *)arg); 261 break; 262 default: 263 vhostfd = -1; 264 } 265 if (vhostfd == -1) { 266 for (i = 0; i < dev->max_queue_pairs; ++i) { 267 if (dev->vhostfds[i] < 0) 268 continue; 269 270 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 271 if (ret < 0) 272 break; 273 } 274 } else { 275 ret = ioctl(vhostfd, req_kernel, arg); 276 } 277 278 if (!ret && req_kernel == VHOST_GET_FEATURES) { 279 /* with tap as the backend, all these features are supported 280 * but not claimed by vhost-net, so we add them back when 281 * reporting to upper layer. 282 */ 283 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 284 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 285 286 /* vhost_kernel will not declare this feature, but it does 287 * support multi-queue. 288 */ 289 if (tap_supporte_mq()) 290 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 291 } 292 293 if (vm) 294 free(vm); 295 296 if (ret < 0) 297 PMD_DRV_LOG(ERR, "%s failed: %s", 298 vhost_msg_strings[req], strerror(errno)); 299 300 return ret; 301 } 302 303 /** 304 * Set up environment to talk with a vhost kernel backend. 305 * 306 * @return 307 * - (-1) if fail to set up; 308 * - (>=0) if successful. 309 */ 310 static int 311 vhost_kernel_setup(struct virtio_user_dev *dev) 312 { 313 int vhostfd; 314 uint32_t i; 315 316 get_vhost_kernel_max_regions(); 317 318 for (i = 0; i < dev->max_queue_pairs; ++i) { 319 vhostfd = open(dev->path, O_RDWR); 320 if (vhostfd < 0) { 321 PMD_DRV_LOG(ERR, "fail to open %s, %s", 322 dev->path, strerror(errno)); 323 return -1; 324 } 325 326 dev->vhostfds[i] = vhostfd; 327 } 328 329 return 0; 330 } 331 332 static int 333 vhost_kernel_set_backend(int vhostfd, int tapfd) 334 { 335 struct vhost_vring_file f; 336 337 f.fd = tapfd; 338 f.index = 0; 339 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 340 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 341 strerror(errno)); 342 return -1; 343 } 344 345 f.index = 1; 346 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 347 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 348 strerror(errno)); 349 return -1; 350 } 351 352 return 0; 353 } 354 355 static int 356 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 357 uint16_t pair_idx, 358 int enable) 359 { 360 int hdr_size; 361 int vhostfd; 362 int tapfd; 363 int req_mq = (dev->max_queue_pairs > 1); 364 365 vhostfd = dev->vhostfds[pair_idx]; 366 367 if (!enable) { 368 if (dev->tapfds[pair_idx] >= 0) { 369 close(dev->tapfds[pair_idx]); 370 dev->tapfds[pair_idx] = -1; 371 } 372 return vhost_kernel_set_backend(vhostfd, -1); 373 } else if (dev->tapfds[pair_idx] >= 0) { 374 return 0; 375 } 376 377 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 378 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 379 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 380 else 381 hdr_size = sizeof(struct virtio_net_hdr); 382 383 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); 384 if (tapfd < 0) { 385 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 386 return -1; 387 } 388 389 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 390 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 391 close(tapfd); 392 return -1; 393 } 394 395 dev->tapfds[pair_idx] = tapfd; 396 return 0; 397 } 398 399 struct virtio_user_backend_ops ops_kernel = { 400 .setup = vhost_kernel_setup, 401 .send_request = vhost_kernel_ioctl, 402 .enable_qp = vhost_kernel_enable_queue_pair 403 }; 404