1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 10 #include <rte_memory.h> 11 12 #include "vhost.h" 13 #include "virtio_user_dev.h" 14 #include "vhost_kernel_tap.h" 15 16 struct vhost_memory_kernel { 17 uint32_t nregions; 18 uint32_t padding; 19 struct vhost_memory_region regions[0]; 20 }; 21 22 /* vhost kernel ioctls */ 23 #define VHOST_VIRTIO 0xAF 24 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 25 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 26 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 27 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 28 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 29 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 30 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 31 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 32 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 33 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 34 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 35 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 36 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 37 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 38 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 39 40 static uint64_t max_regions = 64; 41 42 static void 43 get_vhost_kernel_max_regions(void) 44 { 45 int fd; 46 char buf[20] = {'\0'}; 47 48 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 49 if (fd < 0) 50 return; 51 52 if (read(fd, buf, sizeof(buf) - 1) > 0) 53 max_regions = strtoull(buf, NULL, 10); 54 55 close(fd); 56 } 57 58 static uint64_t vhost_req_user_to_kernel[] = { 59 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 60 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 61 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 62 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 63 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 64 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 65 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 66 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 67 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 68 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 69 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 70 }; 71 72 static int 73 add_memseg_list(const struct rte_memseg_list *msl, void *arg) 74 { 75 struct vhost_memory_kernel *vm = arg; 76 struct vhost_memory_region *mr; 77 void *start_addr; 78 uint64_t len; 79 80 if (msl->external) 81 return 0; 82 83 if (vm->nregions >= max_regions) 84 return -1; 85 86 start_addr = msl->base_va; 87 len = msl->page_sz * msl->memseg_arr.len; 88 89 mr = &vm->regions[vm->nregions++]; 90 91 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 92 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 93 mr->memory_size = len; 94 mr->mmap_offset = 0; /* flags_padding */ 95 96 PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, 97 vm->nregions - 1, start_addr, len); 98 99 return 0; 100 } 101 102 /* By default, vhost kernel module allows 64 regions, but DPDK may 103 * have much more memory regions. Below function will treat each 104 * contiguous memory space reserved by DPDK as one region. 105 */ 106 static struct vhost_memory_kernel * 107 prepare_vhost_memory_kernel(void) 108 { 109 struct vhost_memory_kernel *vm; 110 111 vm = malloc(sizeof(struct vhost_memory_kernel) + 112 max_regions * 113 sizeof(struct vhost_memory_region)); 114 if (!vm) 115 return NULL; 116 117 vm->nregions = 0; 118 vm->padding = 0; 119 120 /* 121 * The memory lock has already been taken by memory subsystem 122 * or virtio_user_start_device(). 123 */ 124 if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) { 125 free(vm); 126 return NULL; 127 } 128 129 return vm; 130 } 131 132 /* with below features, vhost kernel does not need to do the checksum and TSO, 133 * these info will be passed to virtio_user through virtio net header. 134 */ 135 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 136 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 137 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 138 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 139 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 140 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 141 142 /* with below features, when flows from virtio_user to vhost kernel 143 * (1) if flows goes up through the kernel networking stack, it does not need 144 * to verify checksum, which can save CPU cycles; 145 * (2) if flows goes through a Linux bridge and outside from an interface 146 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 147 * offloaded into real physical device. 148 */ 149 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 150 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 151 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 152 (1ULL << VIRTIO_NET_F_CSUM)) 153 154 static unsigned int 155 tap_support_features(void) 156 { 157 int tapfd; 158 unsigned int tap_features; 159 160 tapfd = open(PATH_NET_TUN, O_RDWR); 161 if (tapfd < 0) { 162 PMD_DRV_LOG(ERR, "fail to open %s: %s", 163 PATH_NET_TUN, strerror(errno)); 164 return -1; 165 } 166 167 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 168 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 169 close(tapfd); 170 return -1; 171 } 172 173 close(tapfd); 174 return tap_features; 175 } 176 177 static int 178 vhost_kernel_ioctl(struct virtio_user_dev *dev, 179 enum vhost_user_request req, 180 void *arg) 181 { 182 int ret = -1; 183 unsigned int i; 184 uint64_t req_kernel; 185 struct vhost_memory_kernel *vm = NULL; 186 int vhostfd; 187 unsigned int queue_sel; 188 unsigned int features; 189 190 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 191 192 req_kernel = vhost_req_user_to_kernel[req]; 193 194 if (req_kernel == VHOST_SET_MEM_TABLE) { 195 vm = prepare_vhost_memory_kernel(); 196 if (!vm) 197 return -1; 198 arg = (void *)vm; 199 } 200 201 if (req_kernel == VHOST_SET_FEATURES) { 202 /* We don't need memory protection here */ 203 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 204 205 /* VHOST kernel does not know about below flags */ 206 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 207 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 208 209 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 210 } 211 212 switch (req_kernel) { 213 case VHOST_SET_VRING_NUM: 214 case VHOST_SET_VRING_ADDR: 215 case VHOST_SET_VRING_BASE: 216 case VHOST_GET_VRING_BASE: 217 case VHOST_SET_VRING_KICK: 218 case VHOST_SET_VRING_CALL: 219 queue_sel = *(unsigned int *)arg; 220 vhostfd = dev->vhostfds[queue_sel / 2]; 221 *(unsigned int *)arg = queue_sel % 2; 222 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 223 vhostfd, *(unsigned int *)arg); 224 break; 225 default: 226 vhostfd = -1; 227 } 228 if (vhostfd == -1) { 229 for (i = 0; i < dev->max_queue_pairs; ++i) { 230 if (dev->vhostfds[i] < 0) 231 continue; 232 233 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 234 if (ret < 0) 235 break; 236 } 237 } else { 238 ret = ioctl(vhostfd, req_kernel, arg); 239 } 240 241 if (!ret && req_kernel == VHOST_GET_FEATURES) { 242 features = tap_support_features(); 243 /* with tap as the backend, all these features are supported 244 * but not claimed by vhost-net, so we add them back when 245 * reporting to upper layer. 246 */ 247 if (features & IFF_VNET_HDR) { 248 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 249 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 250 } 251 252 /* vhost_kernel will not declare this feature, but it does 253 * support multi-queue. 254 */ 255 if (features & IFF_MULTI_QUEUE) 256 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 257 } 258 259 if (vm) 260 free(vm); 261 262 if (ret < 0) 263 PMD_DRV_LOG(ERR, "%s failed: %s", 264 vhost_msg_strings[req], strerror(errno)); 265 266 return ret; 267 } 268 269 /** 270 * Set up environment to talk with a vhost kernel backend. 271 * 272 * @return 273 * - (-1) if fail to set up; 274 * - (>=0) if successful. 275 */ 276 static int 277 vhost_kernel_setup(struct virtio_user_dev *dev) 278 { 279 int vhostfd; 280 uint32_t i; 281 282 get_vhost_kernel_max_regions(); 283 284 for (i = 0; i < dev->max_queue_pairs; ++i) { 285 vhostfd = open(dev->path, O_RDWR); 286 if (vhostfd < 0) { 287 PMD_DRV_LOG(ERR, "fail to open %s, %s", 288 dev->path, strerror(errno)); 289 return -1; 290 } 291 292 dev->vhostfds[i] = vhostfd; 293 } 294 295 return 0; 296 } 297 298 static int 299 vhost_kernel_set_backend(int vhostfd, int tapfd) 300 { 301 struct vhost_vring_file f; 302 303 f.fd = tapfd; 304 f.index = 0; 305 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 306 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 307 strerror(errno)); 308 return -1; 309 } 310 311 f.index = 1; 312 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 313 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 314 strerror(errno)); 315 return -1; 316 } 317 318 return 0; 319 } 320 321 static int 322 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 323 uint16_t pair_idx, 324 int enable) 325 { 326 int hdr_size; 327 int vhostfd; 328 int tapfd; 329 int req_mq = (dev->max_queue_pairs > 1); 330 331 vhostfd = dev->vhostfds[pair_idx]; 332 333 if (!enable) { 334 if (dev->tapfds[pair_idx] >= 0) { 335 close(dev->tapfds[pair_idx]); 336 dev->tapfds[pair_idx] = -1; 337 } 338 return vhost_kernel_set_backend(vhostfd, -1); 339 } else if (dev->tapfds[pair_idx] >= 0) { 340 return 0; 341 } 342 343 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 344 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 345 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 346 else 347 hdr_size = sizeof(struct virtio_net_hdr); 348 349 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 350 (char *)dev->mac_addr, dev->features); 351 if (tapfd < 0) { 352 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 353 return -1; 354 } 355 356 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 357 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 358 close(tapfd); 359 return -1; 360 } 361 362 dev->tapfds[pair_idx] = tapfd; 363 return 0; 364 } 365 366 struct virtio_user_backend_ops virtio_ops_kernel = { 367 .setup = vhost_kernel_setup, 368 .send_request = vhost_kernel_ioctl, 369 .enable_qp = vhost_kernel_enable_queue_pair 370 }; 371