1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2023 Red Hat, Inc. 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <unistd.h> 8 #include <fcntl.h> 9 10 11 #include <linux/vduse.h> 12 #include <linux/virtio_net.h> 13 14 #include <sys/ioctl.h> 15 #include <sys/mman.h> 16 #include <sys/stat.h> 17 18 #include <rte_common.h> 19 #include <rte_thread.h> 20 21 #include "fd_man.h" 22 #include "iotlb.h" 23 #include "vduse.h" 24 #include "vhost.h" 25 #include "virtio_net_ctrl.h" 26 27 #define VHOST_VDUSE_API_VERSION 0 28 #define VDUSE_CTRL_PATH "/dev/vduse/control" 29 30 struct vduse { 31 struct fdset *fdset; 32 }; 33 34 static struct vduse vduse; 35 36 static const char * const vduse_reqs_str[] = { 37 "VDUSE_GET_VQ_STATE", 38 "VDUSE_SET_STATUS", 39 "VDUSE_UPDATE_IOTLB", 40 }; 41 42 #define vduse_req_id_to_str(id) \ 43 (id < RTE_DIM(vduse_reqs_str) ? \ 44 vduse_reqs_str[id] : "Unknown") 45 46 static int 47 vduse_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq) 48 { 49 return ioctl(dev->vduse_dev_fd, VDUSE_VQ_INJECT_IRQ, &vq->index); 50 } 51 52 static void 53 vduse_iotlb_remove_notify(uint64_t addr, uint64_t offset, uint64_t size) 54 { 55 munmap((void *)(uintptr_t)addr, offset + size); 56 } 57 58 static int 59 vduse_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm __rte_unused) 60 { 61 struct vduse_iotlb_entry entry; 62 uint64_t size, page_size; 63 struct stat stat; 64 void *mmap_addr; 65 int fd, ret; 66 67 entry.start = iova; 68 entry.last = iova + 1; 69 70 ret = ioctl(dev->vduse_dev_fd, VDUSE_IOTLB_GET_FD, &entry); 71 if (ret < 0) { 72 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get IOTLB entry for 0x%" PRIx64, 73 iova); 74 return -1; 75 } 76 77 fd = ret; 78 79 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "New IOTLB entry:"); 80 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tIOVA: %" PRIx64 " - %" PRIx64, 81 (uint64_t)entry.start, (uint64_t)entry.last); 82 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\toffset: %" PRIx64, (uint64_t)entry.offset); 83 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tfd: %d", fd); 84 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "\tperm: %x", entry.perm); 85 86 size = entry.last - entry.start + 1; 87 mmap_addr = mmap(0, size + entry.offset, entry.perm, MAP_SHARED, fd, 0); 88 if (!mmap_addr) { 89 VHOST_CONFIG_LOG(dev->ifname, ERR, 90 "Failed to mmap IOTLB entry for 0x%" PRIx64, iova); 91 ret = -1; 92 goto close_fd; 93 } 94 95 ret = fstat(fd, &stat); 96 if (ret < 0) { 97 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get page size."); 98 munmap(mmap_addr, entry.offset + size); 99 goto close_fd; 100 } 101 page_size = (uint64_t)stat.st_blksize; 102 103 vhost_user_iotlb_cache_insert(dev, entry.start, (uint64_t)(uintptr_t)mmap_addr, 104 entry.offset, size, page_size, entry.perm); 105 106 ret = 0; 107 close_fd: 108 close(fd); 109 110 return ret; 111 } 112 113 static struct vhost_backend_ops vduse_backend_ops = { 114 .iotlb_miss = vduse_iotlb_miss, 115 .iotlb_remove_notify = vduse_iotlb_remove_notify, 116 .inject_irq = vduse_inject_irq, 117 }; 118 119 static void 120 vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused) 121 { 122 struct virtio_net *dev = arg; 123 uint64_t buf; 124 int ret; 125 126 ret = read(fd, &buf, sizeof(buf)); 127 if (ret < 0) { 128 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read control queue event: %s", 129 strerror(errno)); 130 return; 131 } 132 133 VHOST_CONFIG_LOG(dev->ifname, DEBUG, "Control queue kicked"); 134 if (virtio_net_ctrl_handle(dev)) 135 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to handle ctrl request"); 136 } 137 138 static void 139 vduse_vring_setup(struct virtio_net *dev, unsigned int index) 140 { 141 struct vhost_virtqueue *vq = dev->virtqueue[index]; 142 struct vhost_vring_addr *ra = &vq->ring_addrs; 143 struct vduse_vq_info vq_info; 144 struct vduse_vq_eventfd vq_efd; 145 int ret; 146 147 vq_info.index = index; 148 ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_GET_INFO, &vq_info); 149 if (ret) { 150 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get VQ %u info: %s", 151 index, strerror(errno)); 152 return; 153 } 154 155 VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index); 156 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num); 157 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx", 158 (unsigned long long)vq_info.desc_addr); 159 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdriver_addr: %llx", 160 (unsigned long long)vq_info.driver_addr); 161 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdevice_addr: %llx", 162 (unsigned long long)vq_info.device_addr); 163 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index); 164 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready); 165 166 vq->last_avail_idx = vq_info.split.avail_index; 167 vq->size = vq_info.num; 168 vq->ready = true; 169 vq->enabled = vq_info.ready; 170 ra->desc_user_addr = vq_info.desc_addr; 171 ra->avail_user_addr = vq_info.driver_addr; 172 ra->used_user_addr = vq_info.device_addr; 173 174 vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 175 if (vq->kickfd < 0) { 176 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s", 177 index, strerror(errno)); 178 vq->kickfd = VIRTIO_INVALID_EVENTFD; 179 return; 180 } 181 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tkick fd: %d", vq->kickfd); 182 183 vq->shadow_used_split = rte_malloc_socket(NULL, 184 vq->size * sizeof(struct vring_used_elem), 185 RTE_CACHE_LINE_SIZE, 0); 186 vq->batch_copy_elems = rte_malloc_socket(NULL, 187 vq->size * sizeof(struct batch_copy_elem), 188 RTE_CACHE_LINE_SIZE, 0); 189 190 rte_rwlock_write_lock(&vq->access_lock); 191 vhost_user_iotlb_rd_lock(vq); 192 if (vring_translate(dev, vq)) 193 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to translate vring %d addresses", 194 index); 195 196 if (vhost_enable_guest_notification(dev, vq, 0)) 197 VHOST_CONFIG_LOG(dev->ifname, ERR, 198 "Failed to disable guest notifications on vring %d", 199 index); 200 vhost_user_iotlb_rd_unlock(vq); 201 rte_rwlock_write_unlock(&vq->access_lock); 202 203 vq_efd.index = index; 204 vq_efd.fd = vq->kickfd; 205 206 ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd); 207 if (ret) { 208 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to setup kickfd for VQ %u: %s", 209 index, strerror(errno)); 210 close(vq->kickfd); 211 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; 212 return; 213 } 214 215 if (vq == dev->cvq) { 216 ret = fdset_add(vduse.fdset, vq->kickfd, vduse_control_queue_event, NULL, dev); 217 if (ret) { 218 VHOST_CONFIG_LOG(dev->ifname, ERR, 219 "Failed to setup kickfd handler for VQ %u: %s", 220 index, strerror(errno)); 221 vq_efd.fd = VDUSE_EVENTFD_DEASSIGN; 222 ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd); 223 close(vq->kickfd); 224 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; 225 } 226 vhost_enable_guest_notification(dev, vq, 1); 227 VHOST_CONFIG_LOG(dev->ifname, INFO, "Ctrl queue event handler installed"); 228 } 229 } 230 231 static void 232 vduse_vring_cleanup(struct virtio_net *dev, unsigned int index) 233 { 234 struct vhost_virtqueue *vq = dev->virtqueue[index]; 235 struct vduse_vq_eventfd vq_efd; 236 int ret; 237 238 if (vq == dev->cvq && vq->kickfd >= 0) 239 fdset_del(vduse.fdset, vq->kickfd); 240 241 vq_efd.index = index; 242 vq_efd.fd = VDUSE_EVENTFD_DEASSIGN; 243 244 ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP_KICKFD, &vq_efd); 245 if (ret) 246 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to cleanup kickfd for VQ %u: %s", 247 index, strerror(errno)); 248 249 close(vq->kickfd); 250 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; 251 252 rte_rwlock_write_lock(&vq->access_lock); 253 vring_invalidate(dev, vq); 254 rte_rwlock_write_unlock(&vq->access_lock); 255 256 rte_free(vq->batch_copy_elems); 257 vq->batch_copy_elems = NULL; 258 259 rte_free(vq->shadow_used_split); 260 vq->shadow_used_split = NULL; 261 262 vq->enabled = false; 263 vq->ready = false; 264 vq->size = 0; 265 vq->last_used_idx = 0; 266 vq->last_avail_idx = 0; 267 } 268 269 static void 270 vduse_device_start(struct virtio_net *dev) 271 { 272 unsigned int i, ret; 273 274 VHOST_CONFIG_LOG(dev->ifname, INFO, "Starting device..."); 275 276 dev->notify_ops = vhost_driver_callback_get(dev->ifname); 277 if (!dev->notify_ops) { 278 VHOST_CONFIG_LOG(dev->ifname, ERR, 279 "Failed to get callback ops for driver"); 280 return; 281 } 282 283 ret = ioctl(dev->vduse_dev_fd, VDUSE_DEV_GET_FEATURES, &dev->features); 284 if (ret) { 285 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to get features: %s", 286 strerror(errno)); 287 return; 288 } 289 290 VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64, 291 dev->features); 292 293 if (dev->features & 294 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | 295 (1ULL << VIRTIO_F_VERSION_1) | 296 (1ULL << VIRTIO_F_RING_PACKED))) { 297 dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); 298 } else { 299 dev->vhost_hlen = sizeof(struct virtio_net_hdr); 300 } 301 302 for (i = 0; i < dev->nr_vring; i++) 303 vduse_vring_setup(dev, i); 304 305 dev->flags |= VIRTIO_DEV_READY; 306 307 if (dev->notify_ops->new_device(dev->vid) == 0) 308 dev->flags |= VIRTIO_DEV_RUNNING; 309 310 for (i = 0; i < dev->nr_vring; i++) { 311 struct vhost_virtqueue *vq = dev->virtqueue[i]; 312 313 if (vq == dev->cvq) 314 continue; 315 316 if (dev->notify_ops->vring_state_changed) 317 dev->notify_ops->vring_state_changed(dev->vid, i, vq->enabled); 318 } 319 } 320 321 static void 322 vduse_device_stop(struct virtio_net *dev) 323 { 324 unsigned int i; 325 326 VHOST_CONFIG_LOG(dev->ifname, INFO, "Stopping device..."); 327 328 vhost_destroy_device_notify(dev); 329 330 dev->flags &= ~VIRTIO_DEV_READY; 331 332 for (i = 0; i < dev->nr_vring; i++) 333 vduse_vring_cleanup(dev, i); 334 335 vhost_user_iotlb_flush_all(dev); 336 } 337 338 static void 339 vduse_events_handler(int fd, void *arg, int *remove __rte_unused) 340 { 341 struct virtio_net *dev = arg; 342 struct vduse_dev_request req; 343 struct vduse_dev_response resp; 344 struct vhost_virtqueue *vq; 345 uint8_t old_status = dev->status; 346 int ret; 347 348 memset(&resp, 0, sizeof(resp)); 349 350 ret = read(fd, &req, sizeof(req)); 351 if (ret < 0) { 352 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to read request: %s", 353 strerror(errno)); 354 return; 355 } else if (ret < (int)sizeof(req)) { 356 VHOST_CONFIG_LOG(dev->ifname, ERR, "Incomplete to read request %d", ret); 357 return; 358 } 359 360 VHOST_CONFIG_LOG(dev->ifname, INFO, "New request: %s (%u)", 361 vduse_req_id_to_str(req.type), req.type); 362 363 switch (req.type) { 364 case VDUSE_GET_VQ_STATE: 365 vq = dev->virtqueue[req.vq_state.index]; 366 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tvq index: %u, avail_index: %u", 367 req.vq_state.index, vq->last_avail_idx); 368 resp.vq_state.split.avail_index = vq->last_avail_idx; 369 resp.result = VDUSE_REQ_RESULT_OK; 370 break; 371 case VDUSE_SET_STATUS: 372 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnew status: 0x%08x", 373 req.s.status); 374 old_status = dev->status; 375 dev->status = req.s.status; 376 resp.result = VDUSE_REQ_RESULT_OK; 377 break; 378 case VDUSE_UPDATE_IOTLB: 379 VHOST_CONFIG_LOG(dev->ifname, INFO, "\tIOVA range: %" PRIx64 " - %" PRIx64, 380 (uint64_t)req.iova.start, (uint64_t)req.iova.last); 381 vhost_user_iotlb_cache_remove(dev, req.iova.start, 382 req.iova.last - req.iova.start + 1); 383 resp.result = VDUSE_REQ_RESULT_OK; 384 break; 385 default: 386 resp.result = VDUSE_REQ_RESULT_FAILED; 387 break; 388 } 389 390 resp.request_id = req.request_id; 391 392 ret = write(dev->vduse_dev_fd, &resp, sizeof(resp)); 393 if (ret != sizeof(resp)) { 394 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to write response %s", 395 strerror(errno)); 396 return; 397 } 398 399 if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) { 400 if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) 401 vduse_device_start(dev); 402 else 403 vduse_device_stop(dev); 404 } 405 406 VHOST_CONFIG_LOG(dev->ifname, INFO, "Request %s (%u) handled successfully", 407 vduse_req_id_to_str(req.type), req.type); 408 } 409 410 int 411 vduse_device_create(const char *path, bool compliant_ol_flags) 412 { 413 int control_fd, dev_fd, vid, ret; 414 uint32_t i, max_queue_pairs, total_queues; 415 struct virtio_net *dev; 416 struct virtio_net_config vnet_config = {{ 0 }}; 417 uint64_t ver = VHOST_VDUSE_API_VERSION; 418 uint64_t features; 419 struct vduse_dev_config *dev_config = NULL; 420 const char *name = path + strlen("/dev/vduse/"); 421 422 if (vduse.fdset == NULL) { 423 vduse.fdset = fdset_init("vduse-evt"); 424 if (vduse.fdset == NULL) { 425 VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE fdset"); 426 return -1; 427 } 428 } 429 430 control_fd = open(VDUSE_CTRL_PATH, O_RDWR); 431 if (control_fd < 0) { 432 VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s", 433 VDUSE_CTRL_PATH, strerror(errno)); 434 return -1; 435 } 436 437 if (ioctl(control_fd, VDUSE_SET_API_VERSION, &ver)) { 438 VHOST_CONFIG_LOG(name, ERR, "Failed to set API version: %" PRIu64 ": %s", 439 ver, strerror(errno)); 440 ret = -1; 441 goto out_ctrl_close; 442 } 443 444 dev_config = malloc(offsetof(struct vduse_dev_config, config) + 445 sizeof(vnet_config)); 446 if (!dev_config) { 447 VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config"); 448 ret = -1; 449 goto out_ctrl_close; 450 } 451 452 ret = rte_vhost_driver_get_features(path, &features); 453 if (ret < 0) { 454 VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features"); 455 goto out_free; 456 } 457 458 ret = rte_vhost_driver_get_queue_num(path, &max_queue_pairs); 459 if (ret < 0) { 460 VHOST_CONFIG_LOG(name, ERR, "Failed to get max queue pairs"); 461 goto out_free; 462 } 463 464 VHOST_CONFIG_LOG(path, INFO, "VDUSE max queue pairs: %u", max_queue_pairs); 465 total_queues = max_queue_pairs * 2; 466 467 if (max_queue_pairs == 1) 468 features &= ~(RTE_BIT64(VIRTIO_NET_F_CTRL_VQ) | RTE_BIT64(VIRTIO_NET_F_MQ)); 469 else 470 total_queues += 1; /* Includes ctrl queue */ 471 472 vnet_config.max_virtqueue_pairs = max_queue_pairs; 473 memset(dev_config, 0, sizeof(struct vduse_dev_config)); 474 475 strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1); 476 dev_config->device_id = VIRTIO_ID_NET; 477 dev_config->vendor_id = 0; 478 dev_config->features = features; 479 dev_config->vq_num = total_queues; 480 dev_config->vq_align = sysconf(_SC_PAGE_SIZE); 481 dev_config->config_size = sizeof(struct virtio_net_config); 482 memcpy(dev_config->config, &vnet_config, sizeof(vnet_config)); 483 484 ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config); 485 if (ret < 0) { 486 VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s", 487 strerror(errno)); 488 goto out_free; 489 } 490 491 dev_fd = open(path, O_RDWR); 492 if (dev_fd < 0) { 493 VHOST_CONFIG_LOG(name, ERR, "Failed to open device %s: %s", 494 path, strerror(errno)); 495 ret = -1; 496 goto out_dev_close; 497 } 498 499 ret = fcntl(dev_fd, F_SETFL, O_NONBLOCK); 500 if (ret < 0) { 501 VHOST_CONFIG_LOG(name, ERR, "Failed to set chardev as non-blocking: %s", 502 strerror(errno)); 503 goto out_dev_close; 504 } 505 506 vid = vhost_new_device(&vduse_backend_ops); 507 if (vid < 0) { 508 VHOST_CONFIG_LOG(name, ERR, "Failed to create new Vhost device"); 509 ret = -1; 510 goto out_dev_close; 511 } 512 513 dev = get_device(vid); 514 if (!dev) { 515 ret = -1; 516 goto out_dev_close; 517 } 518 519 strncpy(dev->ifname, path, IF_NAME_SZ - 1); 520 dev->vduse_ctrl_fd = control_fd; 521 dev->vduse_dev_fd = dev_fd; 522 vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true); 523 524 for (i = 0; i < total_queues; i++) { 525 struct vduse_vq_config vq_cfg = { 0 }; 526 527 ret = alloc_vring_queue(dev, i); 528 if (ret) { 529 VHOST_CONFIG_LOG(name, ERR, "Failed to alloc vring %d metadata", i); 530 goto out_dev_destroy; 531 } 532 533 vq_cfg.index = i; 534 vq_cfg.max_size = 1024; 535 536 ret = ioctl(dev->vduse_dev_fd, VDUSE_VQ_SETUP, &vq_cfg); 537 if (ret) { 538 VHOST_CONFIG_LOG(name, ERR, "Failed to set-up VQ %d", i); 539 goto out_dev_destroy; 540 } 541 } 542 543 dev->cvq = dev->virtqueue[max_queue_pairs * 2]; 544 545 ret = fdset_add(vduse.fdset, dev->vduse_dev_fd, vduse_events_handler, NULL, dev); 546 if (ret) { 547 VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset", 548 dev->vduse_dev_fd); 549 goto out_dev_destroy; 550 } 551 552 free(dev_config); 553 554 return 0; 555 556 out_dev_destroy: 557 vhost_destroy_device(vid); 558 out_dev_close: 559 if (dev_fd >= 0) 560 close(dev_fd); 561 ioctl(control_fd, VDUSE_DESTROY_DEV, name); 562 out_free: 563 free(dev_config); 564 out_ctrl_close: 565 close(control_fd); 566 567 return ret; 568 } 569 570 int 571 vduse_device_destroy(const char *path) 572 { 573 const char *name = path + strlen("/dev/vduse/"); 574 struct virtio_net *dev; 575 int vid, ret; 576 577 for (vid = 0; vid < RTE_MAX_VHOST_DEVICE; vid++) { 578 dev = vhost_devices[vid]; 579 580 if (dev == NULL) 581 continue; 582 583 if (!strcmp(path, dev->ifname)) 584 break; 585 } 586 587 if (vid == RTE_MAX_VHOST_DEVICE) 588 return -1; 589 590 vduse_device_stop(dev); 591 592 fdset_del(vduse.fdset, dev->vduse_dev_fd); 593 594 if (dev->vduse_dev_fd >= 0) { 595 close(dev->vduse_dev_fd); 596 dev->vduse_dev_fd = -1; 597 } 598 599 if (dev->vduse_ctrl_fd >= 0) { 600 ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name); 601 if (ret) 602 VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s", 603 strerror(errno)); 604 close(dev->vduse_ctrl_fd); 605 dev->vduse_ctrl_fd = -1; 606 } 607 608 vhost_destroy_device(vid); 609 610 return 0; 611 } 612