1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 5 #include <linux/vhost.h> 6 #include <linux/virtio_net.h> 7 #include <stddef.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 #ifdef RTE_LIBRTE_VHOST_NUMA 11 #include <numa.h> 12 #include <numaif.h> 13 #endif 14 15 #include <rte_errno.h> 16 #include <rte_ethdev.h> 17 #include <rte_log.h> 18 #include <rte_string_fns.h> 19 #include <rte_memory.h> 20 #include <rte_malloc.h> 21 #include <rte_vhost.h> 22 #include <rte_rwlock.h> 23 24 #include "iotlb.h" 25 #include "vhost.h" 26 #include "vhost_user.h" 27 28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; 29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER; 30 31 /* Called with iotlb_lock read-locked */ 32 uint64_t 33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, 34 uint64_t iova, uint64_t *size, uint8_t perm) 35 { 36 uint64_t vva, tmp_size; 37 38 if (unlikely(!*size)) 39 return 0; 40 41 tmp_size = *size; 42 43 vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm); 44 if (tmp_size == *size) 45 return vva; 46 47 iova += tmp_size; 48 49 if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) { 50 /* 51 * iotlb_lock is read-locked for a full burst, 52 * but it only protects the iotlb cache. 53 * In case of IOTLB miss, we might block on the socket, 54 * which could cause a deadlock with QEMU if an IOTLB update 55 * is being handled. We can safely unlock here to avoid it. 56 */ 57 vhost_user_iotlb_rd_unlock(vq); 58 59 vhost_user_iotlb_pending_insert(vq, iova, perm); 60 if (vhost_user_iotlb_miss(dev, iova, perm)) { 61 VHOST_LOG_CONFIG(ERR, 62 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n", 63 iova); 64 vhost_user_iotlb_pending_remove(vq, iova, 1, perm); 65 } 66 67 vhost_user_iotlb_rd_lock(vq); 68 } 69 70 return 0; 71 } 72 73 #define VHOST_LOG_PAGE 4096 74 75 /* 76 * Atomically set a bit in memory. 77 */ 78 static __rte_always_inline void 79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr) 80 { 81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) 82 /* 83 * __sync_ built-ins are deprecated, but __atomic_ ones 84 * are sub-optimized in older GCC versions. 85 */ 86 __sync_fetch_and_or_1(addr, (1U << nr)); 87 #else 88 __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); 89 #endif 90 } 91 92 static __rte_always_inline void 93 vhost_log_page(uint8_t *log_base, uint64_t page) 94 { 95 vhost_set_bit(page % 8, &log_base[page / 8]); 96 } 97 98 void 99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) 100 { 101 uint64_t page; 102 103 if (unlikely(!dev->log_base || !len)) 104 return; 105 106 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) 107 return; 108 109 /* To make sure guest memory updates are committed before logging */ 110 rte_atomic_thread_fence(__ATOMIC_RELEASE); 111 112 page = addr / VHOST_LOG_PAGE; 113 while (page * VHOST_LOG_PAGE < addr + len) { 114 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); 115 page += 1; 116 } 117 } 118 119 void 120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq, 121 uint64_t iova, uint64_t len) 122 { 123 uint64_t hva, gpa, map_len; 124 map_len = len; 125 126 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW); 127 if (map_len != len) { 128 VHOST_LOG_DATA(ERR, 129 "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n", 130 iova); 131 return; 132 } 133 134 gpa = hva_to_gpa(dev, hva, len); 135 if (gpa) 136 __vhost_log_write(dev, gpa, len); 137 } 138 139 void 140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) 141 { 142 unsigned long *log_base; 143 int i; 144 145 if (unlikely(!dev->log_base)) 146 return; 147 148 /* No cache, nothing to sync */ 149 if (unlikely(!vq->log_cache)) 150 return; 151 152 rte_atomic_thread_fence(__ATOMIC_RELEASE); 153 154 log_base = (unsigned long *)(uintptr_t)dev->log_base; 155 156 for (i = 0; i < vq->log_cache_nb_elem; i++) { 157 struct log_cache_entry *elem = vq->log_cache + i; 158 159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) 160 /* 161 * '__sync' builtins are deprecated, but '__atomic' ones 162 * are sub-optimized in older GCC versions. 163 */ 164 __sync_fetch_and_or(log_base + elem->offset, elem->val); 165 #else 166 __atomic_fetch_or(log_base + elem->offset, elem->val, 167 __ATOMIC_RELAXED); 168 #endif 169 } 170 171 rte_atomic_thread_fence(__ATOMIC_RELEASE); 172 173 vq->log_cache_nb_elem = 0; 174 } 175 176 static __rte_always_inline void 177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, 178 uint64_t page) 179 { 180 uint32_t bit_nr = page % (sizeof(unsigned long) << 3); 181 uint32_t offset = page / (sizeof(unsigned long) << 3); 182 int i; 183 184 if (unlikely(!vq->log_cache)) { 185 /* No logging cache allocated, write dirty log map directly */ 186 rte_atomic_thread_fence(__ATOMIC_RELEASE); 187 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); 188 189 return; 190 } 191 192 for (i = 0; i < vq->log_cache_nb_elem; i++) { 193 struct log_cache_entry *elem = vq->log_cache + i; 194 195 if (elem->offset == offset) { 196 elem->val |= (1UL << bit_nr); 197 return; 198 } 199 } 200 201 if (unlikely(i >= VHOST_LOG_CACHE_NR)) { 202 /* 203 * No more room for a new log cache entry, 204 * so write the dirty log map directly. 205 */ 206 rte_atomic_thread_fence(__ATOMIC_RELEASE); 207 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); 208 209 return; 210 } 211 212 vq->log_cache[i].offset = offset; 213 vq->log_cache[i].val = (1UL << bit_nr); 214 vq->log_cache_nb_elem++; 215 } 216 217 void 218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, 219 uint64_t addr, uint64_t len) 220 { 221 uint64_t page; 222 223 if (unlikely(!dev->log_base || !len)) 224 return; 225 226 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) 227 return; 228 229 page = addr / VHOST_LOG_PAGE; 230 while (page * VHOST_LOG_PAGE < addr + len) { 231 vhost_log_cache_page(dev, vq, page); 232 page += 1; 233 } 234 } 235 236 void 237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq, 238 uint64_t iova, uint64_t len) 239 { 240 uint64_t hva, gpa, map_len; 241 map_len = len; 242 243 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW); 244 if (map_len != len) { 245 VHOST_LOG_DATA(ERR, 246 "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n", 247 iova); 248 return; 249 } 250 251 gpa = hva_to_gpa(dev, hva, len); 252 if (gpa) 253 __vhost_log_cache_write(dev, vq, gpa, len); 254 } 255 256 void * 257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq, 258 uint64_t desc_addr, uint64_t desc_len) 259 { 260 void *idesc; 261 uint64_t src, dst; 262 uint64_t len, remain = desc_len; 263 264 idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node); 265 if (unlikely(!idesc)) 266 return NULL; 267 268 dst = (uint64_t)(uintptr_t)idesc; 269 270 while (remain) { 271 len = remain; 272 src = vhost_iova_to_vva(dev, vq, desc_addr, &len, 273 VHOST_ACCESS_RO); 274 if (unlikely(!src || !len)) { 275 rte_free(idesc); 276 return NULL; 277 } 278 279 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); 280 281 remain -= len; 282 dst += len; 283 desc_addr += len; 284 } 285 286 return idesc; 287 } 288 289 void 290 cleanup_vq(struct vhost_virtqueue *vq, int destroy) 291 { 292 if ((vq->callfd >= 0) && (destroy != 0)) 293 close(vq->callfd); 294 if (vq->kickfd >= 0) 295 close(vq->kickfd); 296 } 297 298 void 299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq) 300 { 301 if (!(dev->protocol_features & 302 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) 303 return; 304 305 if (vq_is_packed(dev)) { 306 if (vq->inflight_packed) 307 vq->inflight_packed = NULL; 308 } else { 309 if (vq->inflight_split) 310 vq->inflight_split = NULL; 311 } 312 313 if (vq->resubmit_inflight) { 314 if (vq->resubmit_inflight->resubmit_list) { 315 rte_free(vq->resubmit_inflight->resubmit_list); 316 vq->resubmit_inflight->resubmit_list = NULL; 317 } 318 rte_free(vq->resubmit_inflight); 319 vq->resubmit_inflight = NULL; 320 } 321 } 322 323 /* 324 * Unmap any memory, close any file descriptors and 325 * free any memory owned by a device. 326 */ 327 void 328 cleanup_device(struct virtio_net *dev, int destroy) 329 { 330 uint32_t i; 331 332 vhost_backend_cleanup(dev); 333 334 for (i = 0; i < dev->nr_vring; i++) { 335 cleanup_vq(dev->virtqueue[i], destroy); 336 cleanup_vq_inflight(dev, dev->virtqueue[i]); 337 } 338 } 339 340 static void 341 vhost_free_async_mem(struct vhost_virtqueue *vq) 342 { 343 rte_free(vq->async_pkts_info); 344 345 rte_free(vq->async_buffers_packed); 346 vq->async_buffers_packed = NULL; 347 rte_free(vq->async_descs_split); 348 vq->async_descs_split = NULL; 349 350 rte_free(vq->it_pool); 351 rte_free(vq->vec_pool); 352 353 vq->async_pkts_info = NULL; 354 vq->it_pool = NULL; 355 vq->vec_pool = NULL; 356 } 357 358 void 359 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) 360 { 361 if (vq_is_packed(dev)) 362 rte_free(vq->shadow_used_packed); 363 else 364 rte_free(vq->shadow_used_split); 365 366 vhost_free_async_mem(vq); 367 rte_free(vq->batch_copy_elems); 368 rte_mempool_free(vq->iotlb_pool); 369 rte_free(vq->log_cache); 370 rte_free(vq); 371 } 372 373 /* 374 * Release virtqueues and device memory. 375 */ 376 static void 377 free_device(struct virtio_net *dev) 378 { 379 uint32_t i; 380 381 for (i = 0; i < dev->nr_vring; i++) 382 free_vq(dev, dev->virtqueue[i]); 383 384 rte_free(dev); 385 } 386 387 static __rte_always_inline int 388 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) 389 { 390 if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG)))) 391 return 0; 392 393 vq->log_guest_addr = translate_log_addr(dev, vq, 394 vq->ring_addrs.log_guest_addr); 395 if (vq->log_guest_addr == 0) 396 return -1; 397 398 return 0; 399 } 400 401 /* 402 * Converts vring log address to GPA 403 * If IOMMU is enabled, the log address is IOVA 404 * If IOMMU not enabled, the log address is already GPA 405 * 406 * Caller should have iotlb_lock read-locked 407 */ 408 uint64_t 409 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq, 410 uint64_t log_addr) 411 { 412 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { 413 const uint64_t exp_size = sizeof(uint64_t); 414 uint64_t hva, gpa; 415 uint64_t size = exp_size; 416 417 hva = vhost_iova_to_vva(dev, vq, log_addr, 418 &size, VHOST_ACCESS_RW); 419 420 if (size != exp_size) 421 return 0; 422 423 gpa = hva_to_gpa(dev, hva, exp_size); 424 if (!gpa) { 425 VHOST_LOG_CONFIG(ERR, 426 "VQ: Failed to find GPA for log_addr: 0x%" 427 PRIx64 " hva: 0x%" PRIx64 "\n", 428 log_addr, hva); 429 return 0; 430 } 431 return gpa; 432 433 } else 434 return log_addr; 435 } 436 437 /* Caller should have iotlb_lock read-locked */ 438 static int 439 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq) 440 { 441 uint64_t req_size, size; 442 443 req_size = sizeof(struct vring_desc) * vq->size; 444 size = req_size; 445 vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq, 446 vq->ring_addrs.desc_user_addr, 447 &size, VHOST_ACCESS_RW); 448 if (!vq->desc || size != req_size) 449 return -1; 450 451 req_size = sizeof(struct vring_avail); 452 req_size += sizeof(uint16_t) * vq->size; 453 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) 454 req_size += sizeof(uint16_t); 455 size = req_size; 456 vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq, 457 vq->ring_addrs.avail_user_addr, 458 &size, VHOST_ACCESS_RW); 459 if (!vq->avail || size != req_size) 460 return -1; 461 462 req_size = sizeof(struct vring_used); 463 req_size += sizeof(struct vring_used_elem) * vq->size; 464 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) 465 req_size += sizeof(uint16_t); 466 size = req_size; 467 vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq, 468 vq->ring_addrs.used_user_addr, 469 &size, VHOST_ACCESS_RW); 470 if (!vq->used || size != req_size) 471 return -1; 472 473 return 0; 474 } 475 476 /* Caller should have iotlb_lock read-locked */ 477 static int 478 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) 479 { 480 uint64_t req_size, size; 481 482 req_size = sizeof(struct vring_packed_desc) * vq->size; 483 size = req_size; 484 vq->desc_packed = (struct vring_packed_desc *)(uintptr_t) 485 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr, 486 &size, VHOST_ACCESS_RW); 487 if (!vq->desc_packed || size != req_size) 488 return -1; 489 490 req_size = sizeof(struct vring_packed_desc_event); 491 size = req_size; 492 vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t) 493 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr, 494 &size, VHOST_ACCESS_RW); 495 if (!vq->driver_event || size != req_size) 496 return -1; 497 498 req_size = sizeof(struct vring_packed_desc_event); 499 size = req_size; 500 vq->device_event = (struct vring_packed_desc_event *)(uintptr_t) 501 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr, 502 &size, VHOST_ACCESS_RW); 503 if (!vq->device_event || size != req_size) 504 return -1; 505 506 return 0; 507 } 508 509 int 510 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) 511 { 512 513 if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) 514 return -1; 515 516 if (vq_is_packed(dev)) { 517 if (vring_translate_packed(dev, vq) < 0) 518 return -1; 519 } else { 520 if (vring_translate_split(dev, vq) < 0) 521 return -1; 522 } 523 524 if (log_translate(dev, vq) < 0) 525 return -1; 526 527 vq->access_ok = true; 528 529 return 0; 530 } 531 532 void 533 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) 534 { 535 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 536 vhost_user_iotlb_wr_lock(vq); 537 538 vq->access_ok = false; 539 vq->desc = NULL; 540 vq->avail = NULL; 541 vq->used = NULL; 542 vq->log_guest_addr = 0; 543 544 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 545 vhost_user_iotlb_wr_unlock(vq); 546 } 547 548 static void 549 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) 550 { 551 struct vhost_virtqueue *vq; 552 int numa_node = SOCKET_ID_ANY; 553 554 if (vring_idx >= VHOST_MAX_VRING) { 555 VHOST_LOG_CONFIG(ERR, 556 "Failed not init vring, out of bound (%d)\n", 557 vring_idx); 558 return; 559 } 560 561 vq = dev->virtqueue[vring_idx]; 562 if (!vq) { 563 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n", 564 vring_idx); 565 return; 566 } 567 568 memset(vq, 0, sizeof(struct vhost_virtqueue)); 569 570 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; 571 vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; 572 vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF; 573 574 #ifdef RTE_LIBRTE_VHOST_NUMA 575 if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) { 576 VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n", 577 dev->vid, rte_strerror(errno)); 578 numa_node = SOCKET_ID_ANY; 579 } 580 #endif 581 vq->numa_node = numa_node; 582 583 vhost_user_iotlb_init(dev, vring_idx); 584 } 585 586 static void 587 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx) 588 { 589 struct vhost_virtqueue *vq; 590 int callfd; 591 592 if (vring_idx >= VHOST_MAX_VRING) { 593 VHOST_LOG_CONFIG(ERR, 594 "Failed not init vring, out of bound (%d)\n", 595 vring_idx); 596 return; 597 } 598 599 vq = dev->virtqueue[vring_idx]; 600 if (!vq) { 601 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n", 602 vring_idx); 603 return; 604 } 605 606 callfd = vq->callfd; 607 init_vring_queue(dev, vring_idx); 608 vq->callfd = callfd; 609 } 610 611 int 612 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) 613 { 614 struct vhost_virtqueue *vq; 615 uint32_t i; 616 617 /* Also allocate holes, if any, up to requested vring index. */ 618 for (i = 0; i <= vring_idx; i++) { 619 if (dev->virtqueue[i]) 620 continue; 621 622 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0); 623 if (vq == NULL) { 624 VHOST_LOG_CONFIG(ERR, 625 "Failed to allocate memory for vring:%u.\n", i); 626 return -1; 627 } 628 629 dev->virtqueue[i] = vq; 630 init_vring_queue(dev, i); 631 rte_spinlock_init(&vq->access_lock); 632 vq->avail_wrap_counter = 1; 633 vq->used_wrap_counter = 1; 634 vq->signalled_used_valid = false; 635 } 636 637 dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1); 638 639 return 0; 640 } 641 642 /* 643 * Reset some variables in device structure, while keeping few 644 * others untouched, such as vid, ifname, nr_vring: they 645 * should be same unless the device is removed. 646 */ 647 void 648 reset_device(struct virtio_net *dev) 649 { 650 uint32_t i; 651 652 dev->features = 0; 653 dev->protocol_features = 0; 654 dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET; 655 656 for (i = 0; i < dev->nr_vring; i++) 657 reset_vring_queue(dev, i); 658 } 659 660 /* 661 * Invoked when there is a new vhost-user connection established (when 662 * there is a new virtio device being attached). 663 */ 664 int 665 vhost_new_device(void) 666 { 667 struct virtio_net *dev; 668 int i; 669 670 pthread_mutex_lock(&vhost_dev_lock); 671 for (i = 0; i < MAX_VHOST_DEVICE; i++) { 672 if (vhost_devices[i] == NULL) 673 break; 674 } 675 676 if (i == MAX_VHOST_DEVICE) { 677 VHOST_LOG_CONFIG(ERR, 678 "Failed to find a free slot for new device.\n"); 679 pthread_mutex_unlock(&vhost_dev_lock); 680 return -1; 681 } 682 683 dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); 684 if (dev == NULL) { 685 VHOST_LOG_CONFIG(ERR, 686 "Failed to allocate memory for new dev.\n"); 687 pthread_mutex_unlock(&vhost_dev_lock); 688 return -1; 689 } 690 691 vhost_devices[i] = dev; 692 pthread_mutex_unlock(&vhost_dev_lock); 693 694 dev->vid = i; 695 dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; 696 dev->slave_req_fd = -1; 697 dev->postcopy_ufd = -1; 698 rte_spinlock_init(&dev->slave_req_lock); 699 700 return i; 701 } 702 703 void 704 vhost_destroy_device_notify(struct virtio_net *dev) 705 { 706 struct rte_vdpa_device *vdpa_dev; 707 708 if (dev->flags & VIRTIO_DEV_RUNNING) { 709 vdpa_dev = dev->vdpa_dev; 710 if (vdpa_dev) 711 vdpa_dev->ops->dev_close(dev->vid); 712 dev->flags &= ~VIRTIO_DEV_RUNNING; 713 dev->notify_ops->destroy_device(dev->vid); 714 } 715 } 716 717 /* 718 * Invoked when there is the vhost-user connection is broken (when 719 * the virtio device is being detached). 720 */ 721 void 722 vhost_destroy_device(int vid) 723 { 724 struct virtio_net *dev = get_device(vid); 725 726 if (dev == NULL) 727 return; 728 729 vhost_destroy_device_notify(dev); 730 731 cleanup_device(dev, 1); 732 free_device(dev); 733 734 vhost_devices[vid] = NULL; 735 } 736 737 void 738 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev) 739 { 740 struct virtio_net *dev = get_device(vid); 741 742 if (dev == NULL) 743 return; 744 745 dev->vdpa_dev = vdpa_dev; 746 } 747 748 void 749 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) 750 { 751 struct virtio_net *dev; 752 unsigned int len; 753 754 dev = get_device(vid); 755 if (dev == NULL) 756 return; 757 758 len = if_len > sizeof(dev->ifname) ? 759 sizeof(dev->ifname) : if_len; 760 761 strncpy(dev->ifname, if_name, len); 762 dev->ifname[sizeof(dev->ifname) - 1] = '\0'; 763 } 764 765 void 766 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags) 767 { 768 struct virtio_net *dev = get_device(vid); 769 770 if (dev == NULL) 771 return; 772 773 if (enable) 774 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET; 775 else 776 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET; 777 if (!compliant_ol_flags) 778 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS; 779 else 780 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS; 781 } 782 783 void 784 vhost_enable_extbuf(int vid) 785 { 786 struct virtio_net *dev = get_device(vid); 787 788 if (dev == NULL) 789 return; 790 791 dev->extbuf = 1; 792 } 793 794 void 795 vhost_enable_linearbuf(int vid) 796 { 797 struct virtio_net *dev = get_device(vid); 798 799 if (dev == NULL) 800 return; 801 802 dev->linearbuf = 1; 803 } 804 805 int 806 rte_vhost_get_mtu(int vid, uint16_t *mtu) 807 { 808 struct virtio_net *dev = get_device(vid); 809 810 if (dev == NULL || mtu == NULL) 811 return -ENODEV; 812 813 if (!(dev->flags & VIRTIO_DEV_READY)) 814 return -EAGAIN; 815 816 if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU))) 817 return -ENOTSUP; 818 819 *mtu = dev->mtu; 820 821 return 0; 822 } 823 824 int 825 rte_vhost_get_numa_node(int vid) 826 { 827 #ifdef RTE_LIBRTE_VHOST_NUMA 828 struct virtio_net *dev = get_device(vid); 829 int numa_node; 830 int ret; 831 832 if (dev == NULL || numa_available() != 0) 833 return -1; 834 835 ret = get_mempolicy(&numa_node, NULL, 0, dev, 836 MPOL_F_NODE | MPOL_F_ADDR); 837 if (ret < 0) { 838 VHOST_LOG_CONFIG(ERR, 839 "(%d) failed to query numa node: %s\n", 840 vid, rte_strerror(errno)); 841 return -1; 842 } 843 844 return numa_node; 845 #else 846 RTE_SET_USED(vid); 847 return -1; 848 #endif 849 } 850 851 uint32_t 852 rte_vhost_get_queue_num(int vid) 853 { 854 struct virtio_net *dev = get_device(vid); 855 856 if (dev == NULL) 857 return 0; 858 859 return dev->nr_vring / 2; 860 } 861 862 uint16_t 863 rte_vhost_get_vring_num(int vid) 864 { 865 struct virtio_net *dev = get_device(vid); 866 867 if (dev == NULL) 868 return 0; 869 870 return dev->nr_vring; 871 } 872 873 int 874 rte_vhost_get_ifname(int vid, char *buf, size_t len) 875 { 876 struct virtio_net *dev = get_device(vid); 877 878 if (dev == NULL || buf == NULL) 879 return -1; 880 881 len = RTE_MIN(len, sizeof(dev->ifname)); 882 883 strncpy(buf, dev->ifname, len); 884 buf[len - 1] = '\0'; 885 886 return 0; 887 } 888 889 int 890 rte_vhost_get_negotiated_features(int vid, uint64_t *features) 891 { 892 struct virtio_net *dev; 893 894 dev = get_device(vid); 895 if (dev == NULL || features == NULL) 896 return -1; 897 898 *features = dev->features; 899 return 0; 900 } 901 902 int 903 rte_vhost_get_negotiated_protocol_features(int vid, 904 uint64_t *protocol_features) 905 { 906 struct virtio_net *dev; 907 908 dev = get_device(vid); 909 if (dev == NULL || protocol_features == NULL) 910 return -1; 911 912 *protocol_features = dev->protocol_features; 913 return 0; 914 } 915 916 int 917 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) 918 { 919 struct virtio_net *dev; 920 struct rte_vhost_memory *m; 921 size_t size; 922 923 dev = get_device(vid); 924 if (dev == NULL || mem == NULL) 925 return -1; 926 927 size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region); 928 m = malloc(sizeof(struct rte_vhost_memory) + size); 929 if (!m) 930 return -1; 931 932 m->nregions = dev->mem->nregions; 933 memcpy(m->regions, dev->mem->regions, size); 934 *mem = m; 935 936 return 0; 937 } 938 939 int 940 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, 941 struct rte_vhost_vring *vring) 942 { 943 struct virtio_net *dev; 944 struct vhost_virtqueue *vq; 945 946 dev = get_device(vid); 947 if (dev == NULL || vring == NULL) 948 return -1; 949 950 if (vring_idx >= VHOST_MAX_VRING) 951 return -1; 952 953 vq = dev->virtqueue[vring_idx]; 954 if (!vq) 955 return -1; 956 957 if (vq_is_packed(dev)) { 958 vring->desc_packed = vq->desc_packed; 959 vring->driver_event = vq->driver_event; 960 vring->device_event = vq->device_event; 961 } else { 962 vring->desc = vq->desc; 963 vring->avail = vq->avail; 964 vring->used = vq->used; 965 } 966 vring->log_guest_addr = vq->log_guest_addr; 967 968 vring->callfd = vq->callfd; 969 vring->kickfd = vq->kickfd; 970 vring->size = vq->size; 971 972 return 0; 973 } 974 975 int 976 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx, 977 struct rte_vhost_ring_inflight *vring) 978 { 979 struct virtio_net *dev; 980 struct vhost_virtqueue *vq; 981 982 dev = get_device(vid); 983 if (unlikely(!dev)) 984 return -1; 985 986 if (vring_idx >= VHOST_MAX_VRING) 987 return -1; 988 989 vq = dev->virtqueue[vring_idx]; 990 if (unlikely(!vq)) 991 return -1; 992 993 if (vq_is_packed(dev)) { 994 if (unlikely(!vq->inflight_packed)) 995 return -1; 996 997 vring->inflight_packed = vq->inflight_packed; 998 } else { 999 if (unlikely(!vq->inflight_split)) 1000 return -1; 1001 1002 vring->inflight_split = vq->inflight_split; 1003 } 1004 1005 vring->resubmit_inflight = vq->resubmit_inflight; 1006 1007 return 0; 1008 } 1009 1010 int 1011 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, 1012 uint16_t idx) 1013 { 1014 struct vhost_virtqueue *vq; 1015 struct virtio_net *dev; 1016 1017 dev = get_device(vid); 1018 if (unlikely(!dev)) 1019 return -1; 1020 1021 if (unlikely(!(dev->protocol_features & 1022 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1023 return 0; 1024 1025 if (unlikely(vq_is_packed(dev))) 1026 return -1; 1027 1028 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1029 return -1; 1030 1031 vq = dev->virtqueue[vring_idx]; 1032 if (unlikely(!vq)) 1033 return -1; 1034 1035 if (unlikely(!vq->inflight_split)) 1036 return -1; 1037 1038 if (unlikely(idx >= vq->size)) 1039 return -1; 1040 1041 vq->inflight_split->desc[idx].counter = vq->global_counter++; 1042 vq->inflight_split->desc[idx].inflight = 1; 1043 return 0; 1044 } 1045 1046 int 1047 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx, 1048 uint16_t head, uint16_t last, 1049 uint16_t *inflight_entry) 1050 { 1051 struct rte_vhost_inflight_info_packed *inflight_info; 1052 struct virtio_net *dev; 1053 struct vhost_virtqueue *vq; 1054 struct vring_packed_desc *desc; 1055 uint16_t old_free_head, free_head; 1056 1057 dev = get_device(vid); 1058 if (unlikely(!dev)) 1059 return -1; 1060 1061 if (unlikely(!(dev->protocol_features & 1062 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1063 return 0; 1064 1065 if (unlikely(!vq_is_packed(dev))) 1066 return -1; 1067 1068 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1069 return -1; 1070 1071 vq = dev->virtqueue[vring_idx]; 1072 if (unlikely(!vq)) 1073 return -1; 1074 1075 inflight_info = vq->inflight_packed; 1076 if (unlikely(!inflight_info)) 1077 return -1; 1078 1079 if (unlikely(head >= vq->size)) 1080 return -1; 1081 1082 desc = vq->desc_packed; 1083 old_free_head = inflight_info->old_free_head; 1084 if (unlikely(old_free_head >= vq->size)) 1085 return -1; 1086 1087 free_head = old_free_head; 1088 1089 /* init header descriptor */ 1090 inflight_info->desc[old_free_head].num = 0; 1091 inflight_info->desc[old_free_head].counter = vq->global_counter++; 1092 inflight_info->desc[old_free_head].inflight = 1; 1093 1094 /* save desc entry in flight entry */ 1095 while (head != ((last + 1) % vq->size)) { 1096 inflight_info->desc[old_free_head].num++; 1097 inflight_info->desc[free_head].addr = desc[head].addr; 1098 inflight_info->desc[free_head].len = desc[head].len; 1099 inflight_info->desc[free_head].flags = desc[head].flags; 1100 inflight_info->desc[free_head].id = desc[head].id; 1101 1102 inflight_info->desc[old_free_head].last = free_head; 1103 free_head = inflight_info->desc[free_head].next; 1104 inflight_info->free_head = free_head; 1105 head = (head + 1) % vq->size; 1106 } 1107 1108 inflight_info->old_free_head = free_head; 1109 *inflight_entry = old_free_head; 1110 1111 return 0; 1112 } 1113 1114 int 1115 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, 1116 uint16_t last_used_idx, uint16_t idx) 1117 { 1118 struct virtio_net *dev; 1119 struct vhost_virtqueue *vq; 1120 1121 dev = get_device(vid); 1122 if (unlikely(!dev)) 1123 return -1; 1124 1125 if (unlikely(!(dev->protocol_features & 1126 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1127 return 0; 1128 1129 if (unlikely(vq_is_packed(dev))) 1130 return -1; 1131 1132 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1133 return -1; 1134 1135 vq = dev->virtqueue[vring_idx]; 1136 if (unlikely(!vq)) 1137 return -1; 1138 1139 if (unlikely(!vq->inflight_split)) 1140 return -1; 1141 1142 if (unlikely(idx >= vq->size)) 1143 return -1; 1144 1145 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 1146 1147 vq->inflight_split->desc[idx].inflight = 0; 1148 1149 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 1150 1151 vq->inflight_split->used_idx = last_used_idx; 1152 return 0; 1153 } 1154 1155 int 1156 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx, 1157 uint16_t head) 1158 { 1159 struct rte_vhost_inflight_info_packed *inflight_info; 1160 struct virtio_net *dev; 1161 struct vhost_virtqueue *vq; 1162 1163 dev = get_device(vid); 1164 if (unlikely(!dev)) 1165 return -1; 1166 1167 if (unlikely(!(dev->protocol_features & 1168 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1169 return 0; 1170 1171 if (unlikely(!vq_is_packed(dev))) 1172 return -1; 1173 1174 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1175 return -1; 1176 1177 vq = dev->virtqueue[vring_idx]; 1178 if (unlikely(!vq)) 1179 return -1; 1180 1181 inflight_info = vq->inflight_packed; 1182 if (unlikely(!inflight_info)) 1183 return -1; 1184 1185 if (unlikely(head >= vq->size)) 1186 return -1; 1187 1188 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 1189 1190 inflight_info->desc[head].inflight = 0; 1191 1192 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 1193 1194 inflight_info->old_free_head = inflight_info->free_head; 1195 inflight_info->old_used_idx = inflight_info->used_idx; 1196 inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter; 1197 1198 return 0; 1199 } 1200 1201 int 1202 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, 1203 uint16_t idx) 1204 { 1205 struct virtio_net *dev; 1206 struct vhost_virtqueue *vq; 1207 1208 dev = get_device(vid); 1209 if (unlikely(!dev)) 1210 return -1; 1211 1212 if (unlikely(!(dev->protocol_features & 1213 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1214 return 0; 1215 1216 if (unlikely(vq_is_packed(dev))) 1217 return -1; 1218 1219 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1220 return -1; 1221 1222 vq = dev->virtqueue[vring_idx]; 1223 if (unlikely(!vq)) 1224 return -1; 1225 1226 if (unlikely(!vq->inflight_split)) 1227 return -1; 1228 1229 if (unlikely(idx >= vq->size)) 1230 return -1; 1231 1232 vq->inflight_split->last_inflight_io = idx; 1233 return 0; 1234 } 1235 1236 int 1237 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx, 1238 uint16_t head) 1239 { 1240 struct rte_vhost_inflight_info_packed *inflight_info; 1241 struct virtio_net *dev; 1242 struct vhost_virtqueue *vq; 1243 uint16_t last; 1244 1245 dev = get_device(vid); 1246 if (unlikely(!dev)) 1247 return -1; 1248 1249 if (unlikely(!(dev->protocol_features & 1250 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) 1251 return 0; 1252 1253 if (unlikely(!vq_is_packed(dev))) 1254 return -1; 1255 1256 if (unlikely(vring_idx >= VHOST_MAX_VRING)) 1257 return -1; 1258 1259 vq = dev->virtqueue[vring_idx]; 1260 if (unlikely(!vq)) 1261 return -1; 1262 1263 inflight_info = vq->inflight_packed; 1264 if (unlikely(!inflight_info)) 1265 return -1; 1266 1267 if (unlikely(head >= vq->size)) 1268 return -1; 1269 1270 last = inflight_info->desc[head].last; 1271 if (unlikely(last >= vq->size)) 1272 return -1; 1273 1274 inflight_info->desc[last].next = inflight_info->free_head; 1275 inflight_info->free_head = head; 1276 inflight_info->used_idx += inflight_info->desc[head].num; 1277 if (inflight_info->used_idx >= inflight_info->desc_num) { 1278 inflight_info->used_idx -= inflight_info->desc_num; 1279 inflight_info->used_wrap_counter = 1280 !inflight_info->used_wrap_counter; 1281 } 1282 1283 return 0; 1284 } 1285 1286 int 1287 rte_vhost_vring_call(int vid, uint16_t vring_idx) 1288 { 1289 struct virtio_net *dev; 1290 struct vhost_virtqueue *vq; 1291 1292 dev = get_device(vid); 1293 if (!dev) 1294 return -1; 1295 1296 if (vring_idx >= VHOST_MAX_VRING) 1297 return -1; 1298 1299 vq = dev->virtqueue[vring_idx]; 1300 if (!vq) 1301 return -1; 1302 1303 if (vq_is_packed(dev)) 1304 vhost_vring_call_packed(dev, vq); 1305 else 1306 vhost_vring_call_split(dev, vq); 1307 1308 return 0; 1309 } 1310 1311 uint16_t 1312 rte_vhost_avail_entries(int vid, uint16_t queue_id) 1313 { 1314 struct virtio_net *dev; 1315 struct vhost_virtqueue *vq; 1316 uint16_t ret = 0; 1317 1318 dev = get_device(vid); 1319 if (!dev) 1320 return 0; 1321 1322 if (queue_id >= VHOST_MAX_VRING) 1323 return 0; 1324 1325 vq = dev->virtqueue[queue_id]; 1326 if (!vq) 1327 return 0; 1328 1329 rte_spinlock_lock(&vq->access_lock); 1330 1331 if (unlikely(!vq->enabled || vq->avail == NULL)) 1332 goto out; 1333 1334 ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; 1335 1336 out: 1337 rte_spinlock_unlock(&vq->access_lock); 1338 return ret; 1339 } 1340 1341 static inline int 1342 vhost_enable_notify_split(struct virtio_net *dev, 1343 struct vhost_virtqueue *vq, int enable) 1344 { 1345 if (vq->used == NULL) 1346 return -1; 1347 1348 if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) { 1349 if (enable) 1350 vq->used->flags &= ~VRING_USED_F_NO_NOTIFY; 1351 else 1352 vq->used->flags |= VRING_USED_F_NO_NOTIFY; 1353 } else { 1354 if (enable) 1355 vhost_avail_event(vq) = vq->last_avail_idx; 1356 } 1357 return 0; 1358 } 1359 1360 static inline int 1361 vhost_enable_notify_packed(struct virtio_net *dev, 1362 struct vhost_virtqueue *vq, int enable) 1363 { 1364 uint16_t flags; 1365 1366 if (vq->device_event == NULL) 1367 return -1; 1368 1369 if (!enable) { 1370 vq->device_event->flags = VRING_EVENT_F_DISABLE; 1371 return 0; 1372 } 1373 1374 flags = VRING_EVENT_F_ENABLE; 1375 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) { 1376 flags = VRING_EVENT_F_DESC; 1377 vq->device_event->off_wrap = vq->last_avail_idx | 1378 vq->avail_wrap_counter << 15; 1379 } 1380 1381 rte_atomic_thread_fence(__ATOMIC_RELEASE); 1382 1383 vq->device_event->flags = flags; 1384 return 0; 1385 } 1386 1387 int 1388 vhost_enable_guest_notification(struct virtio_net *dev, 1389 struct vhost_virtqueue *vq, int enable) 1390 { 1391 /* 1392 * If the virtqueue is not ready yet, it will be applied 1393 * when it will become ready. 1394 */ 1395 if (!vq->ready) 1396 return 0; 1397 1398 if (vq_is_packed(dev)) 1399 return vhost_enable_notify_packed(dev, vq, enable); 1400 else 1401 return vhost_enable_notify_split(dev, vq, enable); 1402 } 1403 1404 int 1405 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) 1406 { 1407 struct virtio_net *dev = get_device(vid); 1408 struct vhost_virtqueue *vq; 1409 int ret; 1410 1411 if (!dev) 1412 return -1; 1413 1414 if (queue_id >= VHOST_MAX_VRING) 1415 return -1; 1416 1417 vq = dev->virtqueue[queue_id]; 1418 if (!vq) 1419 return -1; 1420 1421 rte_spinlock_lock(&vq->access_lock); 1422 1423 vq->notif_enable = enable; 1424 ret = vhost_enable_guest_notification(dev, vq, enable); 1425 1426 rte_spinlock_unlock(&vq->access_lock); 1427 1428 return ret; 1429 } 1430 1431 void 1432 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len) 1433 { 1434 struct virtio_net *dev = get_device(vid); 1435 1436 if (dev == NULL) 1437 return; 1438 1439 vhost_log_write(dev, addr, len); 1440 } 1441 1442 void 1443 rte_vhost_log_used_vring(int vid, uint16_t vring_idx, 1444 uint64_t offset, uint64_t len) 1445 { 1446 struct virtio_net *dev; 1447 struct vhost_virtqueue *vq; 1448 1449 dev = get_device(vid); 1450 if (dev == NULL) 1451 return; 1452 1453 if (vring_idx >= VHOST_MAX_VRING) 1454 return; 1455 vq = dev->virtqueue[vring_idx]; 1456 if (!vq) 1457 return; 1458 1459 vhost_log_used_vring(dev, vq, offset, len); 1460 } 1461 1462 uint32_t 1463 rte_vhost_rx_queue_count(int vid, uint16_t qid) 1464 { 1465 struct virtio_net *dev; 1466 struct vhost_virtqueue *vq; 1467 uint32_t ret = 0; 1468 1469 dev = get_device(vid); 1470 if (dev == NULL) 1471 return 0; 1472 1473 if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) { 1474 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", 1475 dev->vid, __func__, qid); 1476 return 0; 1477 } 1478 1479 vq = dev->virtqueue[qid]; 1480 if (vq == NULL) 1481 return 0; 1482 1483 rte_spinlock_lock(&vq->access_lock); 1484 1485 if (unlikely(!vq->enabled || vq->avail == NULL)) 1486 goto out; 1487 1488 ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; 1489 1490 out: 1491 rte_spinlock_unlock(&vq->access_lock); 1492 return ret; 1493 } 1494 1495 struct rte_vdpa_device * 1496 rte_vhost_get_vdpa_device(int vid) 1497 { 1498 struct virtio_net *dev = get_device(vid); 1499 1500 if (dev == NULL) 1501 return NULL; 1502 1503 return dev->vdpa_dev; 1504 } 1505 1506 int 1507 rte_vhost_get_log_base(int vid, uint64_t *log_base, 1508 uint64_t *log_size) 1509 { 1510 struct virtio_net *dev = get_device(vid); 1511 1512 if (dev == NULL || log_base == NULL || log_size == NULL) 1513 return -1; 1514 1515 *log_base = dev->log_base; 1516 *log_size = dev->log_size; 1517 1518 return 0; 1519 } 1520 1521 int 1522 rte_vhost_get_vring_base(int vid, uint16_t queue_id, 1523 uint16_t *last_avail_idx, uint16_t *last_used_idx) 1524 { 1525 struct vhost_virtqueue *vq; 1526 struct virtio_net *dev = get_device(vid); 1527 1528 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL) 1529 return -1; 1530 1531 if (queue_id >= VHOST_MAX_VRING) 1532 return -1; 1533 1534 vq = dev->virtqueue[queue_id]; 1535 if (!vq) 1536 return -1; 1537 1538 if (vq_is_packed(dev)) { 1539 *last_avail_idx = (vq->avail_wrap_counter << 15) | 1540 vq->last_avail_idx; 1541 *last_used_idx = (vq->used_wrap_counter << 15) | 1542 vq->last_used_idx; 1543 } else { 1544 *last_avail_idx = vq->last_avail_idx; 1545 *last_used_idx = vq->last_used_idx; 1546 } 1547 1548 return 0; 1549 } 1550 1551 int 1552 rte_vhost_set_vring_base(int vid, uint16_t queue_id, 1553 uint16_t last_avail_idx, uint16_t last_used_idx) 1554 { 1555 struct vhost_virtqueue *vq; 1556 struct virtio_net *dev = get_device(vid); 1557 1558 if (!dev) 1559 return -1; 1560 1561 if (queue_id >= VHOST_MAX_VRING) 1562 return -1; 1563 1564 vq = dev->virtqueue[queue_id]; 1565 if (!vq) 1566 return -1; 1567 1568 if (vq_is_packed(dev)) { 1569 vq->last_avail_idx = last_avail_idx & 0x7fff; 1570 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15)); 1571 vq->last_used_idx = last_used_idx & 0x7fff; 1572 vq->used_wrap_counter = !!(last_used_idx & (1 << 15)); 1573 } else { 1574 vq->last_avail_idx = last_avail_idx; 1575 vq->last_used_idx = last_used_idx; 1576 } 1577 1578 return 0; 1579 } 1580 1581 int 1582 rte_vhost_get_vring_base_from_inflight(int vid, 1583 uint16_t queue_id, 1584 uint16_t *last_avail_idx, 1585 uint16_t *last_used_idx) 1586 { 1587 struct rte_vhost_inflight_info_packed *inflight_info; 1588 struct vhost_virtqueue *vq; 1589 struct virtio_net *dev = get_device(vid); 1590 1591 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL) 1592 return -1; 1593 1594 if (queue_id >= VHOST_MAX_VRING) 1595 return -1; 1596 1597 vq = dev->virtqueue[queue_id]; 1598 if (!vq) 1599 return -1; 1600 1601 if (!vq_is_packed(dev)) 1602 return -1; 1603 1604 inflight_info = vq->inflight_packed; 1605 if (!inflight_info) 1606 return -1; 1607 1608 *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) | 1609 inflight_info->old_used_idx; 1610 *last_used_idx = *last_avail_idx; 1611 1612 return 0; 1613 } 1614 1615 int 1616 rte_vhost_extern_callback_register(int vid, 1617 struct rte_vhost_user_extern_ops const * const ops, void *ctx) 1618 { 1619 struct virtio_net *dev = get_device(vid); 1620 1621 if (dev == NULL || ops == NULL) 1622 return -1; 1623 1624 dev->extern_ops = *ops; 1625 dev->extern_data = ctx; 1626 return 0; 1627 } 1628 1629 static __rte_always_inline int 1630 async_channel_register(int vid, uint16_t queue_id, 1631 struct rte_vhost_async_channel_ops *ops) 1632 { 1633 struct virtio_net *dev = get_device(vid); 1634 struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; 1635 1636 if (unlikely(vq->async_registered)) { 1637 VHOST_LOG_CONFIG(ERR, 1638 "async register failed: channel already registered " 1639 "(vid %d, qid: %d)\n", vid, queue_id); 1640 return -1; 1641 } 1642 1643 vq->async_pkts_info = rte_malloc_socket(NULL, 1644 vq->size * sizeof(struct async_inflight_info), 1645 RTE_CACHE_LINE_SIZE, vq->numa_node); 1646 if (!vq->async_pkts_info) { 1647 vhost_free_async_mem(vq); 1648 VHOST_LOG_CONFIG(ERR, 1649 "async register failed: cannot allocate memory for async_pkts_info " 1650 "(vid %d, qid: %d)\n", vid, queue_id); 1651 return -1; 1652 } 1653 1654 vq->it_pool = rte_malloc_socket(NULL, 1655 VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter), 1656 RTE_CACHE_LINE_SIZE, vq->numa_node); 1657 if (!vq->it_pool) { 1658 vhost_free_async_mem(vq); 1659 VHOST_LOG_CONFIG(ERR, 1660 "async register failed: cannot allocate memory for it_pool " 1661 "(vid %d, qid: %d)\n", vid, queue_id); 1662 return -1; 1663 } 1664 1665 vq->vec_pool = rte_malloc_socket(NULL, 1666 VHOST_MAX_ASYNC_VEC * sizeof(struct iovec), 1667 RTE_CACHE_LINE_SIZE, vq->numa_node); 1668 if (!vq->vec_pool) { 1669 vhost_free_async_mem(vq); 1670 VHOST_LOG_CONFIG(ERR, 1671 "async register failed: cannot allocate memory for vec_pool " 1672 "(vid %d, qid: %d)\n", vid, queue_id); 1673 return -1; 1674 } 1675 1676 if (vq_is_packed(dev)) { 1677 vq->async_buffers_packed = rte_malloc_socket(NULL, 1678 vq->size * sizeof(struct vring_used_elem_packed), 1679 RTE_CACHE_LINE_SIZE, vq->numa_node); 1680 if (!vq->async_buffers_packed) { 1681 vhost_free_async_mem(vq); 1682 VHOST_LOG_CONFIG(ERR, 1683 "async register failed: cannot allocate memory for async buffers " 1684 "(vid %d, qid: %d)\n", vid, queue_id); 1685 return -1; 1686 } 1687 } else { 1688 vq->async_descs_split = rte_malloc_socket(NULL, 1689 vq->size * sizeof(struct vring_used_elem), 1690 RTE_CACHE_LINE_SIZE, vq->numa_node); 1691 if (!vq->async_descs_split) { 1692 vhost_free_async_mem(vq); 1693 VHOST_LOG_CONFIG(ERR, 1694 "async register failed: cannot allocate memory for async descs " 1695 "(vid %d, qid: %d)\n", vid, queue_id); 1696 return -1; 1697 } 1698 } 1699 1700 vq->async_ops.check_completed_copies = ops->check_completed_copies; 1701 vq->async_ops.transfer_data = ops->transfer_data; 1702 1703 vq->async_registered = true; 1704 1705 return 0; 1706 } 1707 1708 int 1709 rte_vhost_async_channel_register(int vid, uint16_t queue_id, 1710 struct rte_vhost_async_config config, 1711 struct rte_vhost_async_channel_ops *ops) 1712 { 1713 struct vhost_virtqueue *vq; 1714 struct virtio_net *dev = get_device(vid); 1715 int ret; 1716 1717 if (dev == NULL || ops == NULL) 1718 return -1; 1719 1720 if (queue_id >= VHOST_MAX_VRING) 1721 return -1; 1722 1723 vq = dev->virtqueue[queue_id]; 1724 1725 if (unlikely(vq == NULL || !dev->async_copy)) 1726 return -1; 1727 1728 if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) { 1729 VHOST_LOG_CONFIG(ERR, 1730 "async copy is not supported on non-inorder mode " 1731 "(vid %d, qid: %d)\n", vid, queue_id); 1732 return -1; 1733 } 1734 1735 if (unlikely(ops->check_completed_copies == NULL || 1736 ops->transfer_data == NULL)) 1737 return -1; 1738 1739 rte_spinlock_lock(&vq->access_lock); 1740 ret = async_channel_register(vid, queue_id, ops); 1741 rte_spinlock_unlock(&vq->access_lock); 1742 1743 return ret; 1744 } 1745 1746 int 1747 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id, 1748 struct rte_vhost_async_config config, 1749 struct rte_vhost_async_channel_ops *ops) 1750 { 1751 struct vhost_virtqueue *vq; 1752 struct virtio_net *dev = get_device(vid); 1753 1754 if (dev == NULL || ops == NULL) 1755 return -1; 1756 1757 if (queue_id >= VHOST_MAX_VRING) 1758 return -1; 1759 1760 vq = dev->virtqueue[queue_id]; 1761 1762 if (unlikely(vq == NULL || !dev->async_copy)) 1763 return -1; 1764 1765 if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) { 1766 VHOST_LOG_CONFIG(ERR, 1767 "async copy is not supported on non-inorder mode " 1768 "(vid %d, qid: %d)\n", vid, queue_id); 1769 return -1; 1770 } 1771 1772 if (unlikely(ops->check_completed_copies == NULL || 1773 ops->transfer_data == NULL)) 1774 return -1; 1775 1776 return async_channel_register(vid, queue_id, ops); 1777 } 1778 1779 int 1780 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id) 1781 { 1782 struct vhost_virtqueue *vq; 1783 struct virtio_net *dev = get_device(vid); 1784 int ret = -1; 1785 1786 if (dev == NULL) 1787 return ret; 1788 1789 if (queue_id >= VHOST_MAX_VRING) 1790 return ret; 1791 1792 vq = dev->virtqueue[queue_id]; 1793 1794 if (vq == NULL) 1795 return ret; 1796 1797 ret = 0; 1798 1799 if (!vq->async_registered) 1800 return ret; 1801 1802 if (!rte_spinlock_trylock(&vq->access_lock)) { 1803 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " 1804 "virt queue busy.\n"); 1805 return -1; 1806 } 1807 1808 if (vq->async_pkts_inflight_n) { 1809 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " 1810 "async inflight packets must be completed before unregistration.\n"); 1811 ret = -1; 1812 goto out; 1813 } 1814 1815 vhost_free_async_mem(vq); 1816 1817 vq->async_ops.transfer_data = NULL; 1818 vq->async_ops.check_completed_copies = NULL; 1819 vq->async_registered = false; 1820 1821 out: 1822 rte_spinlock_unlock(&vq->access_lock); 1823 1824 return ret; 1825 } 1826 1827 int 1828 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id) 1829 { 1830 struct vhost_virtqueue *vq; 1831 struct virtio_net *dev = get_device(vid); 1832 1833 if (dev == NULL) 1834 return -1; 1835 1836 if (queue_id >= VHOST_MAX_VRING) 1837 return -1; 1838 1839 vq = dev->virtqueue[queue_id]; 1840 1841 if (vq == NULL) 1842 return -1; 1843 1844 if (!vq->async_registered) 1845 return 0; 1846 1847 if (vq->async_pkts_inflight_n) { 1848 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " 1849 "async inflight packets must be completed before unregistration.\n"); 1850 return -1; 1851 } 1852 1853 vhost_free_async_mem(vq); 1854 1855 vq->async_ops.transfer_data = NULL; 1856 vq->async_ops.check_completed_copies = NULL; 1857 vq->async_registered = false; 1858 1859 return 0; 1860 } 1861 1862 int 1863 rte_vhost_async_get_inflight(int vid, uint16_t queue_id) 1864 { 1865 struct vhost_virtqueue *vq; 1866 struct virtio_net *dev = get_device(vid); 1867 int ret = -1; 1868 1869 if (dev == NULL) 1870 return ret; 1871 1872 if (queue_id >= VHOST_MAX_VRING) 1873 return ret; 1874 1875 vq = dev->virtqueue[queue_id]; 1876 1877 if (vq == NULL) 1878 return ret; 1879 1880 if (!vq->async_registered) 1881 return ret; 1882 1883 if (!rte_spinlock_trylock(&vq->access_lock)) { 1884 VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. " 1885 "virt queue busy.\n"); 1886 return ret; 1887 } 1888 1889 ret = vq->async_pkts_inflight_n; 1890 rte_spinlock_unlock(&vq->access_lock); 1891 1892 return ret; 1893 } 1894 1895 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO); 1896 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING); 1897