1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 #include <errno.h> 10 11 #include <rte_memory.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_kernel_data { 18 int *vhostfds; 19 int *tapfds; 20 }; 21 22 struct vhost_memory_kernel { 23 uint32_t nregions; 24 uint32_t padding; 25 struct vhost_memory_region regions[0]; 26 }; 27 28 /* vhost kernel ioctls */ 29 #define VHOST_VIRTIO 0xAF 30 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 31 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 32 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 33 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 34 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 35 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 36 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 37 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 38 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 39 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 40 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 41 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 42 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 43 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 44 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 45 46 /* with below features, vhost kernel does not need to do the checksum and TSO, 47 * these info will be passed to virtio_user through virtio net header. 48 */ 49 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 50 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 51 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 52 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 53 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 54 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 55 56 /* with below features, when flows from virtio_user to vhost kernel 57 * (1) if flows goes up through the kernel networking stack, it does not need 58 * to verify checksum, which can save CPU cycles; 59 * (2) if flows goes through a Linux bridge and outside from an interface 60 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 61 * offloaded into real physical device. 62 */ 63 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 64 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 65 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 66 (1ULL << VIRTIO_NET_F_CSUM)) 67 68 static uint64_t max_regions = 64; 69 70 static void 71 get_vhost_kernel_max_regions(void) 72 { 73 int fd; 74 char buf[20] = {'\0'}; 75 76 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 77 if (fd < 0) 78 return; 79 80 if (read(fd, buf, sizeof(buf) - 1) > 0) 81 max_regions = strtoull(buf, NULL, 10); 82 83 close(fd); 84 } 85 86 static int 87 vhost_kernel_ioctl(int fd, uint64_t request, void *arg) 88 { 89 int ret; 90 91 ret = ioctl(fd, request, arg); 92 if (ret) { 93 PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)", 94 request, strerror(errno)); 95 return -1; 96 } 97 98 return 0; 99 } 100 101 static int 102 vhost_kernel_set_owner(struct virtio_user_dev *dev) 103 { 104 struct vhost_kernel_data *data = dev->backend_data; 105 106 return vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_OWNER, NULL); 107 } 108 109 static int 110 vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features) 111 { 112 int ret; 113 unsigned int tap_features; 114 struct vhost_kernel_data *data = dev->backend_data; 115 116 ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features); 117 if (ret < 0) { 118 PMD_DRV_LOG(ERR, "Failed to get features"); 119 return -1; 120 } 121 122 ret = tap_support_features(&tap_features); 123 if (ret < 0) { 124 PMD_DRV_LOG(ERR, "Failed to get TAP features"); 125 return -1; 126 } 127 128 /* with tap as the backend, all these features are supported 129 * but not claimed by vhost-net, so we add them back when 130 * reporting to upper layer. 131 */ 132 if (tap_features & IFF_VNET_HDR) { 133 *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 134 *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 135 } 136 137 /* vhost_kernel will not declare this feature, but it does 138 * support multi-queue. 139 */ 140 if (tap_features & IFF_MULTI_QUEUE) 141 *features |= (1ull << VIRTIO_NET_F_MQ); 142 143 return 0; 144 } 145 146 static int 147 vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features) 148 { 149 struct vhost_kernel_data *data = dev->backend_data; 150 151 /* We don't need memory protection here */ 152 features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 153 /* VHOST kernel does not know about below flags */ 154 features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 155 features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 156 features &= ~(1ULL << VIRTIO_NET_F_MQ); 157 158 return vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_FEATURES, &features); 159 } 160 161 static int 162 add_memseg_list(const struct rte_memseg_list *msl, void *arg) 163 { 164 struct vhost_memory_kernel *vm = arg; 165 struct vhost_memory_region *mr; 166 void *start_addr; 167 uint64_t len; 168 169 if (msl->external) 170 return 0; 171 172 if (vm->nregions >= max_regions) 173 return -1; 174 175 start_addr = msl->base_va; 176 len = msl->page_sz * msl->memseg_arr.len; 177 178 mr = &vm->regions[vm->nregions++]; 179 180 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 181 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 182 mr->memory_size = len; 183 mr->mmap_offset = 0; /* flags_padding */ 184 185 PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, 186 vm->nregions - 1, start_addr, len); 187 188 return 0; 189 } 190 191 /* By default, vhost kernel module allows 64 regions, but DPDK may 192 * have much more memory regions. Below function will treat each 193 * contiguous memory space reserved by DPDK as one region. 194 */ 195 static int 196 vhost_kernel_set_memory_table(struct virtio_user_dev *dev) 197 { 198 struct vhost_kernel_data *data = dev->backend_data; 199 struct vhost_memory_kernel *vm; 200 int ret; 201 202 vm = malloc(sizeof(struct vhost_memory_kernel) + 203 max_regions * 204 sizeof(struct vhost_memory_region)); 205 if (!vm) 206 goto err; 207 208 vm->nregions = 0; 209 vm->padding = 0; 210 211 /* 212 * The memory lock has already been taken by memory subsystem 213 * or virtio_user_start_device(). 214 */ 215 ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm); 216 if (ret < 0) 217 goto err_free; 218 219 ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_MEM_TABLE, vm); 220 if (ret < 0) 221 goto err_free; 222 223 free(vm); 224 225 return 0; 226 err_free: 227 free(vm); 228 err: 229 PMD_DRV_LOG(ERR, "Failed to set memory table"); 230 return -1; 231 } 232 233 static int 234 vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state) 235 { 236 int ret, fd; 237 unsigned int index = state->index; 238 struct vhost_kernel_data *data = dev->backend_data; 239 240 /* Convert from queue index to queue-pair & offset */ 241 fd = data->vhostfds[state->index / 2]; 242 state->index %= 2; 243 244 ret = vhost_kernel_ioctl(fd, req, state); 245 if (ret < 0) { 246 PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req); 247 return -1; 248 } 249 250 /* restore index back to queue index */ 251 state->index = index; 252 253 return 0; 254 } 255 256 static int 257 vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state) 258 { 259 return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state); 260 } 261 262 static int 263 vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) 264 { 265 return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state); 266 } 267 268 static int 269 vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) 270 { 271 return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state); 272 } 273 274 static int 275 vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req, 276 struct vhost_vring_file *file) 277 { 278 int ret, fd; 279 unsigned int index = file->index; 280 struct vhost_kernel_data *data = dev->backend_data; 281 282 /* Convert from queue index to queue-pair & offset */ 283 fd = data->vhostfds[file->index / 2]; 284 file->index %= 2; 285 286 ret = vhost_kernel_ioctl(fd, req, file); 287 if (ret < 0) { 288 PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req); 289 return -1; 290 } 291 292 /* restore index back to queue index */ 293 file->index = index; 294 295 return 0; 296 } 297 298 static int 299 vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file) 300 { 301 return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file); 302 } 303 304 static int 305 vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file) 306 { 307 return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file); 308 } 309 310 static int 311 vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr) 312 { 313 int ret, fd; 314 unsigned int index = addr->index; 315 struct vhost_kernel_data *data = dev->backend_data; 316 317 /* Convert from queue index to queue-pair & offset */ 318 fd = data->vhostfds[addr->index / 2]; 319 addr->index %= 2; 320 321 ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr); 322 if (ret < 0) { 323 PMD_DRV_LOG(ERR, "Failed to set vring address"); 324 return -1; 325 } 326 327 /* restore index back to queue index */ 328 addr->index = index; 329 330 return 0; 331 } 332 333 static int 334 vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused) 335 { 336 return -ENOTSUP; 337 } 338 339 static int 340 vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused) 341 { 342 return -ENOTSUP; 343 } 344 345 /** 346 * Set up environment to talk with a vhost kernel backend. 347 * 348 * @return 349 * - (-1) if fail to set up; 350 * - (>=0) if successful. 351 */ 352 static int 353 vhost_kernel_setup(struct virtio_user_dev *dev) 354 { 355 int vhostfd; 356 uint32_t q, i; 357 struct vhost_kernel_data *data; 358 359 data = malloc(sizeof(*data)); 360 if (!data) { 361 PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path); 362 return -1; 363 } 364 365 data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); 366 if (!data->vhostfds) { 367 PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path); 368 goto err_data; 369 } 370 data->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); 371 if (!data->tapfds) { 372 PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path); 373 goto err_vhostfds; 374 } 375 376 for (q = 0; q < dev->max_queue_pairs; ++q) { 377 data->vhostfds[q] = -1; 378 data->tapfds[q] = -1; 379 } 380 381 get_vhost_kernel_max_regions(); 382 383 for (i = 0; i < dev->max_queue_pairs; ++i) { 384 vhostfd = open(dev->path, O_RDWR); 385 if (vhostfd < 0) { 386 PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno)); 387 goto err_tapfds; 388 } 389 390 data->vhostfds[i] = vhostfd; 391 } 392 393 dev->backend_data = data; 394 395 return 0; 396 397 err_tapfds: 398 for (i = 0; i < dev->max_queue_pairs; i++) 399 if (data->vhostfds[i] >= 0) 400 close(data->vhostfds[i]); 401 402 free(data->tapfds); 403 err_vhostfds: 404 free(data->vhostfds); 405 err_data: 406 free(data); 407 408 return -1; 409 } 410 411 static int 412 vhost_kernel_destroy(struct virtio_user_dev *dev) 413 { 414 struct vhost_kernel_data *data = dev->backend_data; 415 uint32_t i; 416 417 if (!data) 418 return 0; 419 420 for (i = 0; i < dev->max_queue_pairs; ++i) { 421 if (data->vhostfds[i] >= 0) 422 close(data->vhostfds[i]); 423 if (data->tapfds[i] >= 0) 424 close(data->tapfds[i]); 425 } 426 427 free(data->vhostfds); 428 free(data->tapfds); 429 free(data); 430 dev->backend_data = NULL; 431 432 return 0; 433 } 434 435 static int 436 vhost_kernel_set_backend(int vhostfd, int tapfd) 437 { 438 struct vhost_vring_file f; 439 440 f.fd = tapfd; 441 f.index = 0; 442 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 443 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 444 strerror(errno)); 445 return -1; 446 } 447 448 f.index = 1; 449 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 450 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 451 strerror(errno)); 452 return -1; 453 } 454 455 return 0; 456 } 457 458 static int 459 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 460 uint16_t pair_idx, 461 int enable) 462 { 463 int hdr_size; 464 int vhostfd; 465 int tapfd; 466 int req_mq = (dev->max_queue_pairs > 1); 467 struct vhost_kernel_data *data = dev->backend_data; 468 469 vhostfd = data->vhostfds[pair_idx]; 470 471 if (dev->qp_enabled[pair_idx] == enable) 472 return 0; 473 474 if (!enable) { 475 tapfd = data->tapfds[pair_idx]; 476 if (vhost_kernel_set_backend(vhostfd, -1) < 0) { 477 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 478 return -1; 479 } 480 if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) { 481 PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel"); 482 return -1; 483 } 484 dev->qp_enabled[pair_idx] = false; 485 return 0; 486 } 487 488 if (data->tapfds[pair_idx] >= 0) { 489 tapfd = data->tapfds[pair_idx]; 490 if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) 491 return -1; 492 if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { 493 PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel"); 494 return -1; 495 } 496 goto set_backend; 497 } 498 499 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 500 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 501 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 502 else 503 hdr_size = sizeof(struct virtio_net_hdr); 504 505 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 506 (char *)dev->mac_addr, dev->features); 507 if (tapfd < 0) { 508 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 509 return -1; 510 } 511 512 data->tapfds[pair_idx] = tapfd; 513 514 set_backend: 515 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 516 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 517 return -1; 518 } 519 520 dev->qp_enabled[pair_idx] = true; 521 return 0; 522 } 523 524 static int 525 vhost_kernel_get_backend_features(uint64_t *features) 526 { 527 *features = 0; 528 529 return 0; 530 } 531 532 static int 533 vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused) 534 { 535 /* Nothing to update (Maybe get TAP interface link state?) */ 536 return 0; 537 } 538 539 static int 540 vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused) 541 { 542 /* No link state interrupt with Vhost-kernel */ 543 return -1; 544 } 545 546 struct virtio_user_backend_ops virtio_ops_kernel = { 547 .setup = vhost_kernel_setup, 548 .destroy = vhost_kernel_destroy, 549 .get_backend_features = vhost_kernel_get_backend_features, 550 .set_owner = vhost_kernel_set_owner, 551 .get_features = vhost_kernel_get_features, 552 .set_features = vhost_kernel_set_features, 553 .set_memory_table = vhost_kernel_set_memory_table, 554 .set_vring_num = vhost_kernel_set_vring_num, 555 .set_vring_base = vhost_kernel_set_vring_base, 556 .get_vring_base = vhost_kernel_get_vring_base, 557 .set_vring_call = vhost_kernel_set_vring_call, 558 .set_vring_kick = vhost_kernel_set_vring_kick, 559 .set_vring_addr = vhost_kernel_set_vring_addr, 560 .get_status = vhost_kernel_get_status, 561 .set_status = vhost_kernel_set_status, 562 .enable_qp = vhost_kernel_enable_queue_pair, 563 .update_link_state = vhost_kernel_update_link_state, 564 .get_intr_fd = vhost_kernel_get_intr_fd, 565 }; 566