1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 10 #include <rte_memory.h> 11 #include <rte_eal_memconfig.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_memory_kernel { 18 uint32_t nregions; 19 uint32_t padding; 20 struct vhost_memory_region regions[0]; 21 }; 22 23 /* vhost kernel ioctls */ 24 #define VHOST_VIRTIO 0xAF 25 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 26 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 27 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 28 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 29 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 30 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 31 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 32 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 33 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 34 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 35 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 36 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 37 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 38 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 39 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 40 41 static uint64_t max_regions = 64; 42 43 static void 44 get_vhost_kernel_max_regions(void) 45 { 46 int fd; 47 char buf[20] = {'\0'}; 48 49 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 50 if (fd < 0) 51 return; 52 53 if (read(fd, buf, sizeof(buf) - 1) > 0) 54 max_regions = strtoull(buf, NULL, 10); 55 56 close(fd); 57 } 58 59 static uint64_t vhost_req_user_to_kernel[] = { 60 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 61 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 62 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 63 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 64 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 65 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 66 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 67 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 68 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 69 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 70 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 71 }; 72 73 struct walk_arg { 74 struct vhost_memory_kernel *vm; 75 uint32_t region_nr; 76 }; 77 static int 78 add_memory_region(const struct rte_memseg_list *msl __rte_unused, 79 const struct rte_memseg *ms, size_t len, void *arg) 80 { 81 struct walk_arg *wa = arg; 82 struct vhost_memory_region *mr; 83 void *start_addr; 84 85 if (wa->region_nr >= max_regions) 86 return -1; 87 88 mr = &wa->vm->regions[wa->region_nr++]; 89 start_addr = ms->addr; 90 91 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 92 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 93 mr->memory_size = len; 94 mr->mmap_offset = 0; 95 96 return 0; 97 } 98 99 /* By default, vhost kernel module allows 64 regions, but DPDK allows 100 * 256 segments. As a relief, below function merges those virtually 101 * adjacent memsegs into one region. 102 */ 103 static struct vhost_memory_kernel * 104 prepare_vhost_memory_kernel(void) 105 { 106 struct vhost_memory_kernel *vm; 107 struct walk_arg wa; 108 109 vm = malloc(sizeof(struct vhost_memory_kernel) + 110 max_regions * 111 sizeof(struct vhost_memory_region)); 112 if (!vm) 113 return NULL; 114 115 wa.region_nr = 0; 116 wa.vm = vm; 117 118 if (rte_memseg_contig_walk(add_memory_region, &wa) < 0) { 119 free(vm); 120 return NULL; 121 } 122 123 vm->nregions = wa.region_nr; 124 vm->padding = 0; 125 return vm; 126 } 127 128 /* with below features, vhost kernel does not need to do the checksum and TSO, 129 * these info will be passed to virtio_user through virtio net header. 130 */ 131 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 132 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 133 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 134 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 135 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 136 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 137 138 /* with below features, when flows from virtio_user to vhost kernel 139 * (1) if flows goes up through the kernel networking stack, it does not need 140 * to verify checksum, which can save CPU cycles; 141 * (2) if flows goes through a Linux bridge and outside from an interface 142 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 143 * offloaded into real physical device. 144 */ 145 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 146 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 147 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 148 (1ULL << VIRTIO_NET_F_CSUM)) 149 150 static int 151 tap_supporte_mq(void) 152 { 153 int tapfd; 154 unsigned int tap_features; 155 156 tapfd = open(PATH_NET_TUN, O_RDWR); 157 if (tapfd < 0) { 158 PMD_DRV_LOG(ERR, "fail to open %s: %s", 159 PATH_NET_TUN, strerror(errno)); 160 return -1; 161 } 162 163 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 164 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 165 close(tapfd); 166 return -1; 167 } 168 169 close(tapfd); 170 return tap_features & IFF_MULTI_QUEUE; 171 } 172 173 static int 174 vhost_kernel_ioctl(struct virtio_user_dev *dev, 175 enum vhost_user_request req, 176 void *arg) 177 { 178 int ret = -1; 179 unsigned int i; 180 uint64_t req_kernel; 181 struct vhost_memory_kernel *vm = NULL; 182 int vhostfd; 183 unsigned int queue_sel; 184 185 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 186 187 req_kernel = vhost_req_user_to_kernel[req]; 188 189 if (req_kernel == VHOST_SET_MEM_TABLE) { 190 vm = prepare_vhost_memory_kernel(); 191 if (!vm) 192 return -1; 193 arg = (void *)vm; 194 } 195 196 if (req_kernel == VHOST_SET_FEATURES) { 197 /* We don't need memory protection here */ 198 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 199 200 /* VHOST kernel does not know about below flags */ 201 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 202 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 203 204 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 205 } 206 207 switch (req_kernel) { 208 case VHOST_SET_VRING_NUM: 209 case VHOST_SET_VRING_ADDR: 210 case VHOST_SET_VRING_BASE: 211 case VHOST_GET_VRING_BASE: 212 case VHOST_SET_VRING_KICK: 213 case VHOST_SET_VRING_CALL: 214 queue_sel = *(unsigned int *)arg; 215 vhostfd = dev->vhostfds[queue_sel / 2]; 216 *(unsigned int *)arg = queue_sel % 2; 217 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 218 vhostfd, *(unsigned int *)arg); 219 break; 220 default: 221 vhostfd = -1; 222 } 223 if (vhostfd == -1) { 224 for (i = 0; i < dev->max_queue_pairs; ++i) { 225 if (dev->vhostfds[i] < 0) 226 continue; 227 228 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 229 if (ret < 0) 230 break; 231 } 232 } else { 233 ret = ioctl(vhostfd, req_kernel, arg); 234 } 235 236 if (!ret && req_kernel == VHOST_GET_FEATURES) { 237 /* with tap as the backend, all these features are supported 238 * but not claimed by vhost-net, so we add them back when 239 * reporting to upper layer. 240 */ 241 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 242 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 243 244 /* vhost_kernel will not declare this feature, but it does 245 * support multi-queue. 246 */ 247 if (tap_supporte_mq()) 248 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 249 } 250 251 if (vm) 252 free(vm); 253 254 if (ret < 0) 255 PMD_DRV_LOG(ERR, "%s failed: %s", 256 vhost_msg_strings[req], strerror(errno)); 257 258 return ret; 259 } 260 261 /** 262 * Set up environment to talk with a vhost kernel backend. 263 * 264 * @return 265 * - (-1) if fail to set up; 266 * - (>=0) if successful. 267 */ 268 static int 269 vhost_kernel_setup(struct virtio_user_dev *dev) 270 { 271 int vhostfd; 272 uint32_t i; 273 274 get_vhost_kernel_max_regions(); 275 276 for (i = 0; i < dev->max_queue_pairs; ++i) { 277 vhostfd = open(dev->path, O_RDWR); 278 if (vhostfd < 0) { 279 PMD_DRV_LOG(ERR, "fail to open %s, %s", 280 dev->path, strerror(errno)); 281 return -1; 282 } 283 284 dev->vhostfds[i] = vhostfd; 285 } 286 287 return 0; 288 } 289 290 static int 291 vhost_kernel_set_backend(int vhostfd, int tapfd) 292 { 293 struct vhost_vring_file f; 294 295 f.fd = tapfd; 296 f.index = 0; 297 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 298 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 299 strerror(errno)); 300 return -1; 301 } 302 303 f.index = 1; 304 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 305 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 306 strerror(errno)); 307 return -1; 308 } 309 310 return 0; 311 } 312 313 static int 314 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 315 uint16_t pair_idx, 316 int enable) 317 { 318 int hdr_size; 319 int vhostfd; 320 int tapfd; 321 int req_mq = (dev->max_queue_pairs > 1); 322 323 vhostfd = dev->vhostfds[pair_idx]; 324 325 if (!enable) { 326 if (dev->tapfds[pair_idx] >= 0) { 327 close(dev->tapfds[pair_idx]); 328 dev->tapfds[pair_idx] = -1; 329 } 330 return vhost_kernel_set_backend(vhostfd, -1); 331 } else if (dev->tapfds[pair_idx] >= 0) { 332 return 0; 333 } 334 335 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 336 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 337 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 338 else 339 hdr_size = sizeof(struct virtio_net_hdr); 340 341 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 342 (char *)dev->mac_addr); 343 if (tapfd < 0) { 344 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 345 return -1; 346 } 347 348 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 349 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 350 close(tapfd); 351 return -1; 352 } 353 354 dev->tapfds[pair_idx] = tapfd; 355 return 0; 356 } 357 358 struct virtio_user_backend_ops ops_kernel = { 359 .setup = vhost_kernel_setup, 360 .send_request = vhost_kernel_ioctl, 361 .enable_qp = vhost_kernel_enable_queue_pair 362 }; 363