1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 /* Security model 6 * -------------- 7 * The vhost-user protocol connection is an external interface, so it must be 8 * robust against invalid inputs. 9 * 10 * This is important because the vhost-user master is only one step removed 11 * from the guest. Malicious guests that have escaped will then launch further 12 * attacks from the vhost-user master. 13 * 14 * Even in deployments where guests are trusted, a bug in the vhost-user master 15 * can still cause invalid messages to be sent. Such messages must not 16 * compromise the stability of the DPDK application by causing crashes, memory 17 * corruption, or other problematic behavior. 18 * 19 * Do not assume received VhostUserMsg fields contain sensible values! 20 */ 21 22 #include <stdint.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <unistd.h> 27 #include <fcntl.h> 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 #include <sys/types.h> 31 #include <sys/stat.h> 32 #include <sys/syscall.h> 33 #include <assert.h> 34 #ifdef RTE_LIBRTE_VHOST_NUMA 35 #include <numaif.h> 36 #endif 37 #ifdef RTE_LIBRTE_VHOST_POSTCOPY 38 #include <linux/userfaultfd.h> 39 #endif 40 #ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */ 41 #include <linux/memfd.h> 42 #define MEMFD_SUPPORTED 43 #endif 44 45 #include <rte_common.h> 46 #include <rte_malloc.h> 47 #include <rte_log.h> 48 49 #include "iotlb.h" 50 #include "vhost.h" 51 #include "vhost_user.h" 52 53 #define VIRTIO_MIN_MTU 68 54 #define VIRTIO_MAX_MTU 65535 55 56 #define INFLIGHT_ALIGNMENT 64 57 #define INFLIGHT_VERSION 0x1 58 59 static const char *vhost_message_str[VHOST_USER_MAX] = { 60 [VHOST_USER_NONE] = "VHOST_USER_NONE", 61 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", 62 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", 63 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", 64 [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", 65 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", 66 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", 67 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", 68 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", 69 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", 70 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", 71 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", 72 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", 73 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", 74 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", 75 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", 76 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", 77 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", 78 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", 79 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", 80 [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", 81 [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD", 82 [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", 83 [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS", 84 [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS", 85 [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", 86 [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", 87 [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", 88 [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", 89 [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", 90 [VHOST_USER_SET_STATUS] = "VHOST_USER_SET_STATUS", 91 [VHOST_USER_GET_STATUS] = "VHOST_USER_GET_STATUS", 92 }; 93 94 static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); 95 static int read_vhost_message(int sockfd, struct VhostUserMsg *msg); 96 97 static void 98 close_msg_fds(struct VhostUserMsg *msg) 99 { 100 int i; 101 102 for (i = 0; i < msg->fd_num; i++) { 103 int fd = msg->fds[i]; 104 105 if (fd == -1) 106 continue; 107 108 msg->fds[i] = -1; 109 close(fd); 110 } 111 } 112 113 /* 114 * Ensure the expected number of FDs is received, 115 * close all FDs and return an error if this is not the case. 116 */ 117 static int 118 validate_msg_fds(struct VhostUserMsg *msg, int expected_fds) 119 { 120 if (msg->fd_num == expected_fds) 121 return 0; 122 123 VHOST_LOG_CONFIG(ERR, 124 " Expect %d FDs for request %s, received %d\n", 125 expected_fds, 126 vhost_message_str[msg->request.master], 127 msg->fd_num); 128 129 close_msg_fds(msg); 130 131 return -1; 132 } 133 134 static uint64_t 135 get_blk_size(int fd) 136 { 137 struct stat stat; 138 int ret; 139 140 ret = fstat(fd, &stat); 141 return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; 142 } 143 144 static void 145 free_mem_region(struct virtio_net *dev) 146 { 147 uint32_t i; 148 struct rte_vhost_mem_region *reg; 149 150 if (!dev || !dev->mem) 151 return; 152 153 for (i = 0; i < dev->mem->nregions; i++) { 154 reg = &dev->mem->regions[i]; 155 if (reg->host_user_addr) { 156 munmap(reg->mmap_addr, reg->mmap_size); 157 close(reg->fd); 158 } 159 } 160 } 161 162 void 163 vhost_backend_cleanup(struct virtio_net *dev) 164 { 165 if (dev->mem) { 166 free_mem_region(dev); 167 rte_free(dev->mem); 168 dev->mem = NULL; 169 } 170 171 rte_free(dev->guest_pages); 172 dev->guest_pages = NULL; 173 174 if (dev->log_addr) { 175 munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); 176 dev->log_addr = 0; 177 } 178 179 if (dev->inflight_info) { 180 if (dev->inflight_info->addr) { 181 munmap(dev->inflight_info->addr, 182 dev->inflight_info->size); 183 dev->inflight_info->addr = NULL; 184 } 185 186 if (dev->inflight_info->fd >= 0) { 187 close(dev->inflight_info->fd); 188 dev->inflight_info->fd = -1; 189 } 190 191 free(dev->inflight_info); 192 dev->inflight_info = NULL; 193 } 194 195 if (dev->slave_req_fd >= 0) { 196 close(dev->slave_req_fd); 197 dev->slave_req_fd = -1; 198 } 199 200 if (dev->postcopy_ufd >= 0) { 201 close(dev->postcopy_ufd); 202 dev->postcopy_ufd = -1; 203 } 204 205 dev->postcopy_listening = 0; 206 } 207 208 static void 209 vhost_user_notify_queue_state(struct virtio_net *dev, uint16_t index, 210 int enable) 211 { 212 struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; 213 struct vhost_virtqueue *vq = dev->virtqueue[index]; 214 215 /* Configure guest notifications on enable */ 216 if (enable && vq->notif_enable != VIRTIO_UNINITIALIZED_NOTIF) 217 vhost_enable_guest_notification(dev, vq, vq->notif_enable); 218 219 if (vdpa_dev && vdpa_dev->ops->set_vring_state) 220 vdpa_dev->ops->set_vring_state(dev->vid, index, enable); 221 222 if (dev->notify_ops->vring_state_changed) 223 dev->notify_ops->vring_state_changed(dev->vid, 224 index, enable); 225 } 226 227 /* 228 * This function just returns success at the moment unless 229 * the device hasn't been initialised. 230 */ 231 static int 232 vhost_user_set_owner(struct virtio_net **pdev __rte_unused, 233 struct VhostUserMsg *msg, 234 int main_fd __rte_unused) 235 { 236 if (validate_msg_fds(msg, 0) != 0) 237 return RTE_VHOST_MSG_RESULT_ERR; 238 239 return RTE_VHOST_MSG_RESULT_OK; 240 } 241 242 static int 243 vhost_user_reset_owner(struct virtio_net **pdev, 244 struct VhostUserMsg *msg, 245 int main_fd __rte_unused) 246 { 247 struct virtio_net *dev = *pdev; 248 249 if (validate_msg_fds(msg, 0) != 0) 250 return RTE_VHOST_MSG_RESULT_ERR; 251 252 vhost_destroy_device_notify(dev); 253 254 cleanup_device(dev, 0); 255 reset_device(dev); 256 return RTE_VHOST_MSG_RESULT_OK; 257 } 258 259 /* 260 * The features that we support are requested. 261 */ 262 static int 263 vhost_user_get_features(struct virtio_net **pdev, struct VhostUserMsg *msg, 264 int main_fd __rte_unused) 265 { 266 struct virtio_net *dev = *pdev; 267 uint64_t features = 0; 268 269 if (validate_msg_fds(msg, 0) != 0) 270 return RTE_VHOST_MSG_RESULT_ERR; 271 272 rte_vhost_driver_get_features(dev->ifname, &features); 273 274 msg->payload.u64 = features; 275 msg->size = sizeof(msg->payload.u64); 276 msg->fd_num = 0; 277 278 return RTE_VHOST_MSG_RESULT_REPLY; 279 } 280 281 /* 282 * The queue number that we support are requested. 283 */ 284 static int 285 vhost_user_get_queue_num(struct virtio_net **pdev, struct VhostUserMsg *msg, 286 int main_fd __rte_unused) 287 { 288 struct virtio_net *dev = *pdev; 289 uint32_t queue_num = 0; 290 291 if (validate_msg_fds(msg, 0) != 0) 292 return RTE_VHOST_MSG_RESULT_ERR; 293 294 rte_vhost_driver_get_queue_num(dev->ifname, &queue_num); 295 296 msg->payload.u64 = (uint64_t)queue_num; 297 msg->size = sizeof(msg->payload.u64); 298 msg->fd_num = 0; 299 300 return RTE_VHOST_MSG_RESULT_REPLY; 301 } 302 303 /* 304 * We receive the negotiated features supported by us and the virtio device. 305 */ 306 static int 307 vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, 308 int main_fd __rte_unused) 309 { 310 struct virtio_net *dev = *pdev; 311 uint64_t features = msg->payload.u64; 312 uint64_t vhost_features = 0; 313 struct rte_vdpa_device *vdpa_dev; 314 315 if (validate_msg_fds(msg, 0) != 0) 316 return RTE_VHOST_MSG_RESULT_ERR; 317 318 rte_vhost_driver_get_features(dev->ifname, &vhost_features); 319 if (features & ~vhost_features) { 320 VHOST_LOG_CONFIG(ERR, 321 "(%d) received invalid negotiated features.\n", 322 dev->vid); 323 dev->flags |= VIRTIO_DEV_FEATURES_FAILED; 324 dev->status &= ~VIRTIO_DEVICE_STATUS_FEATURES_OK; 325 326 return RTE_VHOST_MSG_RESULT_ERR; 327 } 328 329 if (dev->flags & VIRTIO_DEV_RUNNING) { 330 if (dev->features == features) 331 return RTE_VHOST_MSG_RESULT_OK; 332 333 /* 334 * Error out if master tries to change features while device is 335 * in running state. The exception being VHOST_F_LOG_ALL, which 336 * is enabled when the live-migration starts. 337 */ 338 if ((dev->features ^ features) & ~(1ULL << VHOST_F_LOG_ALL)) { 339 VHOST_LOG_CONFIG(ERR, 340 "(%d) features changed while device is running.\n", 341 dev->vid); 342 return RTE_VHOST_MSG_RESULT_ERR; 343 } 344 345 if (dev->notify_ops->features_changed) 346 dev->notify_ops->features_changed(dev->vid, features); 347 } 348 349 dev->features = features; 350 if (dev->features & 351 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | 352 (1ULL << VIRTIO_F_VERSION_1) | 353 (1ULL << VIRTIO_F_RING_PACKED))) { 354 dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); 355 } else { 356 dev->vhost_hlen = sizeof(struct virtio_net_hdr); 357 } 358 VHOST_LOG_CONFIG(INFO, 359 "negotiated Virtio features: 0x%" PRIx64 "\n", dev->features); 360 VHOST_LOG_CONFIG(DEBUG, 361 "(%d) mergeable RX buffers %s, virtio 1 %s\n", 362 dev->vid, 363 (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", 364 (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); 365 366 if ((dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) && 367 !(dev->features & (1ULL << VIRTIO_NET_F_MQ))) { 368 /* 369 * Remove all but first queue pair if MQ hasn't been 370 * negotiated. This is safe because the device is not 371 * running at this stage. 372 */ 373 while (dev->nr_vring > 2) { 374 struct vhost_virtqueue *vq; 375 376 vq = dev->virtqueue[--dev->nr_vring]; 377 if (!vq) 378 continue; 379 380 dev->virtqueue[dev->nr_vring] = NULL; 381 cleanup_vq(vq, 1); 382 cleanup_vq_inflight(dev, vq); 383 free_vq(dev, vq); 384 } 385 } 386 387 vdpa_dev = dev->vdpa_dev; 388 if (vdpa_dev) 389 vdpa_dev->ops->set_features(dev->vid); 390 391 dev->flags &= ~VIRTIO_DEV_FEATURES_FAILED; 392 return RTE_VHOST_MSG_RESULT_OK; 393 } 394 395 /* 396 * The virtio device sends us the size of the descriptor ring. 397 */ 398 static int 399 vhost_user_set_vring_num(struct virtio_net **pdev, 400 struct VhostUserMsg *msg, 401 int main_fd __rte_unused) 402 { 403 struct virtio_net *dev = *pdev; 404 struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; 405 406 if (validate_msg_fds(msg, 0) != 0) 407 return RTE_VHOST_MSG_RESULT_ERR; 408 409 if (msg->payload.state.num > 32768) { 410 VHOST_LOG_CONFIG(ERR, "invalid virtqueue size %u\n", msg->payload.state.num); 411 return RTE_VHOST_MSG_RESULT_ERR; 412 } 413 414 vq->size = msg->payload.state.num; 415 416 /* VIRTIO 1.0, 2.4 Virtqueues says: 417 * 418 * Queue Size value is always a power of 2. The maximum Queue Size 419 * value is 32768. 420 * 421 * VIRTIO 1.1 2.7 Virtqueues says: 422 * 423 * Packed virtqueues support up to 2^15 entries each. 424 */ 425 if (!vq_is_packed(dev)) { 426 if (vq->size & (vq->size - 1)) { 427 VHOST_LOG_CONFIG(ERR, 428 "invalid virtqueue size %u\n", vq->size); 429 return RTE_VHOST_MSG_RESULT_ERR; 430 } 431 } 432 433 if (vq_is_packed(dev)) { 434 if (vq->shadow_used_packed) 435 rte_free(vq->shadow_used_packed); 436 vq->shadow_used_packed = rte_malloc(NULL, 437 vq->size * 438 sizeof(struct vring_used_elem_packed), 439 RTE_CACHE_LINE_SIZE); 440 if (!vq->shadow_used_packed) { 441 VHOST_LOG_CONFIG(ERR, 442 "failed to allocate memory for shadow used ring.\n"); 443 return RTE_VHOST_MSG_RESULT_ERR; 444 } 445 446 } else { 447 if (vq->shadow_used_split) 448 rte_free(vq->shadow_used_split); 449 450 vq->shadow_used_split = rte_malloc(NULL, 451 vq->size * sizeof(struct vring_used_elem), 452 RTE_CACHE_LINE_SIZE); 453 454 if (!vq->shadow_used_split) { 455 VHOST_LOG_CONFIG(ERR, 456 "failed to allocate memory for vq internal data.\n"); 457 return RTE_VHOST_MSG_RESULT_ERR; 458 } 459 } 460 461 if (vq->batch_copy_elems) 462 rte_free(vq->batch_copy_elems); 463 vq->batch_copy_elems = rte_malloc(NULL, 464 vq->size * sizeof(struct batch_copy_elem), 465 RTE_CACHE_LINE_SIZE); 466 if (!vq->batch_copy_elems) { 467 VHOST_LOG_CONFIG(ERR, 468 "failed to allocate memory for batching copy.\n"); 469 return RTE_VHOST_MSG_RESULT_ERR; 470 } 471 472 return RTE_VHOST_MSG_RESULT_OK; 473 } 474 475 /* 476 * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the 477 * same numa node as the memory of vring descriptor. 478 */ 479 #ifdef RTE_LIBRTE_VHOST_NUMA 480 static struct virtio_net* 481 numa_realloc(struct virtio_net *dev, int index) 482 { 483 int oldnode, newnode; 484 struct virtio_net *old_dev; 485 struct vhost_virtqueue *old_vq, *vq; 486 struct vring_used_elem *new_shadow_used_split; 487 struct vring_used_elem_packed *new_shadow_used_packed; 488 struct batch_copy_elem *new_batch_copy_elems; 489 int ret; 490 491 if (dev->flags & VIRTIO_DEV_RUNNING) 492 return dev; 493 494 old_dev = dev; 495 vq = old_vq = dev->virtqueue[index]; 496 497 ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, 498 MPOL_F_NODE | MPOL_F_ADDR); 499 500 /* check if we need to reallocate vq */ 501 ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, 502 MPOL_F_NODE | MPOL_F_ADDR); 503 if (ret) { 504 VHOST_LOG_CONFIG(ERR, 505 "Unable to get vq numa information.\n"); 506 return dev; 507 } 508 if (oldnode != newnode) { 509 VHOST_LOG_CONFIG(INFO, 510 "reallocate vq from %d to %d node\n", oldnode, newnode); 511 vq = rte_malloc_socket(NULL, sizeof(*vq), 0, newnode); 512 if (!vq) 513 return dev; 514 515 memcpy(vq, old_vq, sizeof(*vq)); 516 517 if (vq_is_packed(dev)) { 518 new_shadow_used_packed = rte_malloc_socket(NULL, 519 vq->size * 520 sizeof(struct vring_used_elem_packed), 521 RTE_CACHE_LINE_SIZE, 522 newnode); 523 if (new_shadow_used_packed) { 524 rte_free(vq->shadow_used_packed); 525 vq->shadow_used_packed = new_shadow_used_packed; 526 } 527 } else { 528 new_shadow_used_split = rte_malloc_socket(NULL, 529 vq->size * 530 sizeof(struct vring_used_elem), 531 RTE_CACHE_LINE_SIZE, 532 newnode); 533 if (new_shadow_used_split) { 534 rte_free(vq->shadow_used_split); 535 vq->shadow_used_split = new_shadow_used_split; 536 } 537 } 538 539 new_batch_copy_elems = rte_malloc_socket(NULL, 540 vq->size * sizeof(struct batch_copy_elem), 541 RTE_CACHE_LINE_SIZE, 542 newnode); 543 if (new_batch_copy_elems) { 544 rte_free(vq->batch_copy_elems); 545 vq->batch_copy_elems = new_batch_copy_elems; 546 } 547 548 rte_free(old_vq); 549 } 550 551 /* check if we need to reallocate dev */ 552 ret = get_mempolicy(&oldnode, NULL, 0, old_dev, 553 MPOL_F_NODE | MPOL_F_ADDR); 554 if (ret) { 555 VHOST_LOG_CONFIG(ERR, 556 "Unable to get dev numa information.\n"); 557 goto out; 558 } 559 if (oldnode != newnode) { 560 VHOST_LOG_CONFIG(INFO, 561 "reallocate dev from %d to %d node\n", 562 oldnode, newnode); 563 dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); 564 if (!dev) { 565 dev = old_dev; 566 goto out; 567 } 568 569 memcpy(dev, old_dev, sizeof(*dev)); 570 rte_free(old_dev); 571 } 572 573 out: 574 dev->virtqueue[index] = vq; 575 vhost_devices[dev->vid] = dev; 576 577 if (old_vq != vq) 578 vhost_user_iotlb_init(dev, index); 579 580 return dev; 581 } 582 #else 583 static struct virtio_net* 584 numa_realloc(struct virtio_net *dev, int index __rte_unused) 585 { 586 return dev; 587 } 588 #endif 589 590 /* Converts QEMU virtual address to Vhost virtual address. */ 591 static uint64_t 592 qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) 593 { 594 struct rte_vhost_mem_region *r; 595 uint32_t i; 596 597 if (unlikely(!dev || !dev->mem)) 598 goto out_error; 599 600 /* Find the region where the address lives. */ 601 for (i = 0; i < dev->mem->nregions; i++) { 602 r = &dev->mem->regions[i]; 603 604 if (qva >= r->guest_user_addr && 605 qva < r->guest_user_addr + r->size) { 606 607 if (unlikely(*len > r->guest_user_addr + r->size - qva)) 608 *len = r->guest_user_addr + r->size - qva; 609 610 return qva - r->guest_user_addr + 611 r->host_user_addr; 612 } 613 } 614 out_error: 615 *len = 0; 616 617 return 0; 618 } 619 620 621 /* 622 * Converts ring address to Vhost virtual address. 623 * If IOMMU is enabled, the ring address is a guest IO virtual address, 624 * else it is a QEMU virtual address. 625 */ 626 static uint64_t 627 ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, 628 uint64_t ra, uint64_t *size) 629 { 630 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { 631 uint64_t vva; 632 633 vhost_user_iotlb_rd_lock(vq); 634 vva = vhost_iova_to_vva(dev, vq, ra, 635 size, VHOST_ACCESS_RW); 636 vhost_user_iotlb_rd_unlock(vq); 637 638 return vva; 639 } 640 641 return qva_to_vva(dev, ra, size); 642 } 643 644 static uint64_t 645 log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) 646 { 647 uint64_t log_gpa; 648 649 vhost_user_iotlb_rd_lock(vq); 650 log_gpa = translate_log_addr(dev, vq, vq->ring_addrs.log_guest_addr); 651 vhost_user_iotlb_rd_unlock(vq); 652 653 return log_gpa; 654 } 655 656 static struct virtio_net * 657 translate_ring_addresses(struct virtio_net *dev, int vq_index) 658 { 659 struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; 660 struct vhost_vring_addr *addr = &vq->ring_addrs; 661 uint64_t len, expected_len; 662 663 if (addr->flags & (1 << VHOST_VRING_F_LOG)) { 664 vq->log_guest_addr = 665 log_addr_to_gpa(dev, vq); 666 if (vq->log_guest_addr == 0) { 667 VHOST_LOG_CONFIG(DEBUG, 668 "(%d) failed to map log_guest_addr.\n", 669 dev->vid); 670 return dev; 671 } 672 } 673 674 if (vq_is_packed(dev)) { 675 len = sizeof(struct vring_packed_desc) * vq->size; 676 vq->desc_packed = (struct vring_packed_desc *)(uintptr_t) 677 ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len); 678 if (vq->desc_packed == NULL || 679 len != sizeof(struct vring_packed_desc) * 680 vq->size) { 681 VHOST_LOG_CONFIG(DEBUG, 682 "(%d) failed to map desc_packed ring.\n", 683 dev->vid); 684 return dev; 685 } 686 687 dev = numa_realloc(dev, vq_index); 688 vq = dev->virtqueue[vq_index]; 689 addr = &vq->ring_addrs; 690 691 len = sizeof(struct vring_packed_desc_event); 692 vq->driver_event = (struct vring_packed_desc_event *) 693 (uintptr_t)ring_addr_to_vva(dev, 694 vq, addr->avail_user_addr, &len); 695 if (vq->driver_event == NULL || 696 len != sizeof(struct vring_packed_desc_event)) { 697 VHOST_LOG_CONFIG(DEBUG, 698 "(%d) failed to find driver area address.\n", 699 dev->vid); 700 return dev; 701 } 702 703 len = sizeof(struct vring_packed_desc_event); 704 vq->device_event = (struct vring_packed_desc_event *) 705 (uintptr_t)ring_addr_to_vva(dev, 706 vq, addr->used_user_addr, &len); 707 if (vq->device_event == NULL || 708 len != sizeof(struct vring_packed_desc_event)) { 709 VHOST_LOG_CONFIG(DEBUG, 710 "(%d) failed to find device area address.\n", 711 dev->vid); 712 return dev; 713 } 714 715 vq->access_ok = true; 716 return dev; 717 } 718 719 /* The addresses are converted from QEMU virtual to Vhost virtual. */ 720 if (vq->desc && vq->avail && vq->used) 721 return dev; 722 723 len = sizeof(struct vring_desc) * vq->size; 724 vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, 725 vq, addr->desc_user_addr, &len); 726 if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) { 727 VHOST_LOG_CONFIG(DEBUG, 728 "(%d) failed to map desc ring.\n", 729 dev->vid); 730 return dev; 731 } 732 733 dev = numa_realloc(dev, vq_index); 734 vq = dev->virtqueue[vq_index]; 735 addr = &vq->ring_addrs; 736 737 len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; 738 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) 739 len += sizeof(uint16_t); 740 expected_len = len; 741 vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, 742 vq, addr->avail_user_addr, &len); 743 if (vq->avail == 0 || len != expected_len) { 744 VHOST_LOG_CONFIG(DEBUG, 745 "(%d) failed to map avail ring.\n", 746 dev->vid); 747 return dev; 748 } 749 750 len = sizeof(struct vring_used) + 751 sizeof(struct vring_used_elem) * vq->size; 752 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) 753 len += sizeof(uint16_t); 754 expected_len = len; 755 vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, 756 vq, addr->used_user_addr, &len); 757 if (vq->used == 0 || len != expected_len) { 758 VHOST_LOG_CONFIG(DEBUG, 759 "(%d) failed to map used ring.\n", 760 dev->vid); 761 return dev; 762 } 763 764 if (vq->last_used_idx != vq->used->idx) { 765 VHOST_LOG_CONFIG(WARNING, 766 "last_used_idx (%u) and vq->used->idx (%u) mismatches; " 767 "some packets maybe resent for Tx and dropped for Rx\n", 768 vq->last_used_idx, vq->used->idx); 769 vq->last_used_idx = vq->used->idx; 770 vq->last_avail_idx = vq->used->idx; 771 } 772 773 vq->access_ok = true; 774 775 VHOST_LOG_CONFIG(DEBUG, "(%d) mapped address desc: %p\n", 776 dev->vid, vq->desc); 777 VHOST_LOG_CONFIG(DEBUG, "(%d) mapped address avail: %p\n", 778 dev->vid, vq->avail); 779 VHOST_LOG_CONFIG(DEBUG, "(%d) mapped address used: %p\n", 780 dev->vid, vq->used); 781 VHOST_LOG_CONFIG(DEBUG, "(%d) log_guest_addr: %" PRIx64 "\n", 782 dev->vid, vq->log_guest_addr); 783 784 return dev; 785 } 786 787 /* 788 * The virtio device sends us the desc, used and avail ring addresses. 789 * This function then converts these to our address space. 790 */ 791 static int 792 vhost_user_set_vring_addr(struct virtio_net **pdev, struct VhostUserMsg *msg, 793 int main_fd __rte_unused) 794 { 795 struct virtio_net *dev = *pdev; 796 struct vhost_virtqueue *vq; 797 struct vhost_vring_addr *addr = &msg->payload.addr; 798 bool access_ok; 799 800 if (validate_msg_fds(msg, 0) != 0) 801 return RTE_VHOST_MSG_RESULT_ERR; 802 803 if (dev->mem == NULL) 804 return RTE_VHOST_MSG_RESULT_ERR; 805 806 /* addr->index refers to the queue index. The txq 1, rxq is 0. */ 807 vq = dev->virtqueue[msg->payload.addr.index]; 808 809 access_ok = vq->access_ok; 810 811 /* 812 * Rings addresses should not be interpreted as long as the ring is not 813 * started and enabled 814 */ 815 memcpy(&vq->ring_addrs, addr, sizeof(*addr)); 816 817 vring_invalidate(dev, vq); 818 819 if ((vq->enabled && (dev->features & 820 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) || 821 access_ok) { 822 dev = translate_ring_addresses(dev, msg->payload.addr.index); 823 if (!dev) 824 return RTE_VHOST_MSG_RESULT_ERR; 825 826 *pdev = dev; 827 } 828 829 return RTE_VHOST_MSG_RESULT_OK; 830 } 831 832 /* 833 * The virtio device sends us the available ring last used index. 834 */ 835 static int 836 vhost_user_set_vring_base(struct virtio_net **pdev, 837 struct VhostUserMsg *msg, 838 int main_fd __rte_unused) 839 { 840 struct virtio_net *dev = *pdev; 841 struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; 842 uint64_t val = msg->payload.state.num; 843 844 if (validate_msg_fds(msg, 0) != 0) 845 return RTE_VHOST_MSG_RESULT_ERR; 846 847 if (vq_is_packed(dev)) { 848 /* 849 * Bit[0:14]: avail index 850 * Bit[15]: avail wrap counter 851 */ 852 vq->last_avail_idx = val & 0x7fff; 853 vq->avail_wrap_counter = !!(val & (0x1 << 15)); 854 /* 855 * Set used index to same value as available one, as 856 * their values should be the same since ring processing 857 * was stopped at get time. 858 */ 859 vq->last_used_idx = vq->last_avail_idx; 860 vq->used_wrap_counter = vq->avail_wrap_counter; 861 } else { 862 vq->last_used_idx = msg->payload.state.num; 863 vq->last_avail_idx = msg->payload.state.num; 864 } 865 866 return RTE_VHOST_MSG_RESULT_OK; 867 } 868 869 static int 870 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, 871 uint64_t host_phys_addr, uint64_t size) 872 { 873 struct guest_page *page, *last_page; 874 struct guest_page *old_pages; 875 876 if (dev->nr_guest_pages == dev->max_guest_pages) { 877 dev->max_guest_pages *= 2; 878 old_pages = dev->guest_pages; 879 dev->guest_pages = rte_realloc(dev->guest_pages, 880 dev->max_guest_pages * sizeof(*page), 881 RTE_CACHE_LINE_SIZE); 882 if (dev->guest_pages == NULL) { 883 VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n"); 884 rte_free(old_pages); 885 return -1; 886 } 887 } 888 889 if (dev->nr_guest_pages > 0) { 890 last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; 891 /* merge if the two pages are continuous */ 892 if (host_phys_addr == last_page->host_phys_addr + 893 last_page->size) { 894 last_page->size += size; 895 return 0; 896 } 897 } 898 899 page = &dev->guest_pages[dev->nr_guest_pages++]; 900 page->guest_phys_addr = guest_phys_addr; 901 page->host_phys_addr = host_phys_addr; 902 page->size = size; 903 904 return 0; 905 } 906 907 static int 908 add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, 909 uint64_t page_size) 910 { 911 uint64_t reg_size = reg->size; 912 uint64_t host_user_addr = reg->host_user_addr; 913 uint64_t guest_phys_addr = reg->guest_phys_addr; 914 uint64_t host_phys_addr; 915 uint64_t size; 916 917 host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr); 918 size = page_size - (guest_phys_addr & (page_size - 1)); 919 size = RTE_MIN(size, reg_size); 920 921 if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) 922 return -1; 923 924 host_user_addr += size; 925 guest_phys_addr += size; 926 reg_size -= size; 927 928 while (reg_size > 0) { 929 size = RTE_MIN(reg_size, page_size); 930 host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) 931 host_user_addr); 932 if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, 933 size) < 0) 934 return -1; 935 936 host_user_addr += size; 937 guest_phys_addr += size; 938 reg_size -= size; 939 } 940 941 /* sort guest page array if over binary search threshold */ 942 if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) { 943 qsort((void *)dev->guest_pages, dev->nr_guest_pages, 944 sizeof(struct guest_page), guest_page_addrcmp); 945 } 946 947 return 0; 948 } 949 950 #ifdef RTE_LIBRTE_VHOST_DEBUG 951 /* TODO: enable it only in debug mode? */ 952 static void 953 dump_guest_pages(struct virtio_net *dev) 954 { 955 uint32_t i; 956 struct guest_page *page; 957 958 for (i = 0; i < dev->nr_guest_pages; i++) { 959 page = &dev->guest_pages[i]; 960 961 VHOST_LOG_CONFIG(INFO, 962 "guest physical page region %u\n" 963 "\t guest_phys_addr: %" PRIx64 "\n" 964 "\t host_phys_addr : %" PRIx64 "\n" 965 "\t size : %" PRIx64 "\n", 966 i, 967 page->guest_phys_addr, 968 page->host_phys_addr, 969 page->size); 970 } 971 } 972 #else 973 #define dump_guest_pages(dev) 974 #endif 975 976 static bool 977 vhost_memory_changed(struct VhostUserMemory *new, 978 struct rte_vhost_memory *old) 979 { 980 uint32_t i; 981 982 if (new->nregions != old->nregions) 983 return true; 984 985 for (i = 0; i < new->nregions; ++i) { 986 VhostUserMemoryRegion *new_r = &new->regions[i]; 987 struct rte_vhost_mem_region *old_r = &old->regions[i]; 988 989 if (new_r->guest_phys_addr != old_r->guest_phys_addr) 990 return true; 991 if (new_r->memory_size != old_r->size) 992 return true; 993 if (new_r->userspace_addr != old_r->guest_user_addr) 994 return true; 995 } 996 997 return false; 998 } 999 1000 #ifdef RTE_LIBRTE_VHOST_POSTCOPY 1001 static int 1002 vhost_user_postcopy_region_register(struct virtio_net *dev, 1003 struct rte_vhost_mem_region *reg) 1004 { 1005 struct uffdio_register reg_struct; 1006 1007 /* 1008 * Let's register all the mmap'ed area to ensure 1009 * alignment on page boundary. 1010 */ 1011 reg_struct.range.start = (uint64_t)(uintptr_t)reg->mmap_addr; 1012 reg_struct.range.len = reg->mmap_size; 1013 reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; 1014 1015 if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, 1016 ®_struct)) { 1017 VHOST_LOG_CONFIG(ERR, "Failed to register ufd for region " 1018 "%" PRIx64 " - %" PRIx64 " (ufd = %d) %s\n", 1019 (uint64_t)reg_struct.range.start, 1020 (uint64_t)reg_struct.range.start + 1021 (uint64_t)reg_struct.range.len - 1, 1022 dev->postcopy_ufd, 1023 strerror(errno)); 1024 return -1; 1025 } 1026 1027 VHOST_LOG_CONFIG(INFO, "\t userfaultfd registered for range : %" PRIx64 " - %" PRIx64 "\n", 1028 (uint64_t)reg_struct.range.start, 1029 (uint64_t)reg_struct.range.start + 1030 (uint64_t)reg_struct.range.len - 1); 1031 1032 return 0; 1033 } 1034 #else 1035 static int 1036 vhost_user_postcopy_region_register(struct virtio_net *dev __rte_unused, 1037 struct rte_vhost_mem_region *reg __rte_unused) 1038 { 1039 return -1; 1040 } 1041 #endif 1042 1043 static int 1044 vhost_user_postcopy_register(struct virtio_net *dev, int main_fd, 1045 struct VhostUserMsg *msg) 1046 { 1047 struct VhostUserMemory *memory; 1048 struct rte_vhost_mem_region *reg; 1049 VhostUserMsg ack_msg; 1050 uint32_t i; 1051 1052 if (!dev->postcopy_listening) 1053 return 0; 1054 1055 /* 1056 * We haven't a better way right now than sharing 1057 * DPDK's virtual address with Qemu, so that Qemu can 1058 * retrieve the region offset when handling userfaults. 1059 */ 1060 memory = &msg->payload.memory; 1061 for (i = 0; i < memory->nregions; i++) { 1062 reg = &dev->mem->regions[i]; 1063 memory->regions[i].userspace_addr = reg->host_user_addr; 1064 } 1065 1066 /* Send the addresses back to qemu */ 1067 msg->fd_num = 0; 1068 send_vhost_reply(main_fd, msg); 1069 1070 /* Wait for qemu to acknolwedge it's got the addresses 1071 * we've got to wait before we're allowed to generate faults. 1072 */ 1073 if (read_vhost_message(main_fd, &ack_msg) <= 0) { 1074 VHOST_LOG_CONFIG(ERR, 1075 "Failed to read qemu ack on postcopy set-mem-table\n"); 1076 return -1; 1077 } 1078 1079 if (validate_msg_fds(&ack_msg, 0) != 0) 1080 return -1; 1081 1082 if (ack_msg.request.master != VHOST_USER_SET_MEM_TABLE) { 1083 VHOST_LOG_CONFIG(ERR, 1084 "Bad qemu ack on postcopy set-mem-table (%d)\n", 1085 ack_msg.request.master); 1086 return -1; 1087 } 1088 1089 /* Now userfault register and we can use the memory */ 1090 for (i = 0; i < memory->nregions; i++) { 1091 reg = &dev->mem->regions[i]; 1092 if (vhost_user_postcopy_region_register(dev, reg) < 0) 1093 return -1; 1094 } 1095 1096 return 0; 1097 } 1098 1099 static int 1100 vhost_user_mmap_region(struct virtio_net *dev, 1101 struct rte_vhost_mem_region *region, 1102 uint64_t mmap_offset) 1103 { 1104 void *mmap_addr; 1105 uint64_t mmap_size; 1106 uint64_t alignment; 1107 int populate; 1108 1109 /* Check for memory_size + mmap_offset overflow */ 1110 if (mmap_offset >= -region->size) { 1111 VHOST_LOG_CONFIG(ERR, 1112 "mmap_offset (%#"PRIx64") and memory_size " 1113 "(%#"PRIx64") overflow\n", 1114 mmap_offset, region->size); 1115 return -1; 1116 } 1117 1118 mmap_size = region->size + mmap_offset; 1119 1120 /* mmap() without flag of MAP_ANONYMOUS, should be called with length 1121 * argument aligned with hugepagesz at older longterm version Linux, 1122 * like 2.6.32 and 3.2.72, or mmap() will fail with EINVAL. 1123 * 1124 * To avoid failure, make sure in caller to keep length aligned. 1125 */ 1126 alignment = get_blk_size(region->fd); 1127 if (alignment == (uint64_t)-1) { 1128 VHOST_LOG_CONFIG(ERR, 1129 "couldn't get hugepage size through fstat\n"); 1130 return -1; 1131 } 1132 mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); 1133 if (mmap_size == 0) { 1134 /* 1135 * It could happen if initial mmap_size + alignment overflows 1136 * the sizeof uint64, which could happen if either mmap_size or 1137 * alignment value is wrong. 1138 * 1139 * mmap() kernel implementation would return an error, but 1140 * better catch it before and provide useful info in the logs. 1141 */ 1142 VHOST_LOG_CONFIG(ERR, "mmap size (0x%" PRIx64 ") " 1143 "or alignment (0x%" PRIx64 ") is invalid\n", 1144 region->size + mmap_offset, alignment); 1145 return -1; 1146 } 1147 1148 populate = dev->async_copy ? MAP_POPULATE : 0; 1149 mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1150 MAP_SHARED | populate, region->fd, 0); 1151 1152 if (mmap_addr == MAP_FAILED) { 1153 VHOST_LOG_CONFIG(ERR, "mmap failed (%s).\n", strerror(errno)); 1154 return -1; 1155 } 1156 1157 region->mmap_addr = mmap_addr; 1158 region->mmap_size = mmap_size; 1159 region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset; 1160 1161 if (dev->async_copy) 1162 if (add_guest_pages(dev, region, alignment) < 0) { 1163 VHOST_LOG_CONFIG(ERR, 1164 "adding guest pages to region failed.\n"); 1165 return -1; 1166 } 1167 1168 VHOST_LOG_CONFIG(INFO, 1169 "guest memory region size: 0x%" PRIx64 "\n" 1170 "\t guest physical addr: 0x%" PRIx64 "\n" 1171 "\t guest virtual addr: 0x%" PRIx64 "\n" 1172 "\t host virtual addr: 0x%" PRIx64 "\n" 1173 "\t mmap addr : 0x%" PRIx64 "\n" 1174 "\t mmap size : 0x%" PRIx64 "\n" 1175 "\t mmap align: 0x%" PRIx64 "\n" 1176 "\t mmap off : 0x%" PRIx64 "\n", 1177 region->size, 1178 region->guest_phys_addr, 1179 region->guest_user_addr, 1180 region->host_user_addr, 1181 (uint64_t)(uintptr_t)mmap_addr, 1182 mmap_size, 1183 alignment, 1184 mmap_offset); 1185 1186 return 0; 1187 } 1188 1189 static int 1190 vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, 1191 int main_fd) 1192 { 1193 struct virtio_net *dev = *pdev; 1194 struct VhostUserMemory *memory = &msg->payload.memory; 1195 struct rte_vhost_mem_region *reg; 1196 1197 uint64_t mmap_offset; 1198 uint32_t i; 1199 1200 if (validate_msg_fds(msg, memory->nregions) != 0) 1201 return RTE_VHOST_MSG_RESULT_ERR; 1202 1203 if (memory->nregions > VHOST_MEMORY_MAX_NREGIONS) { 1204 VHOST_LOG_CONFIG(ERR, 1205 "too many memory regions (%u)\n", memory->nregions); 1206 goto close_msg_fds; 1207 } 1208 1209 if (dev->mem && !vhost_memory_changed(memory, dev->mem)) { 1210 VHOST_LOG_CONFIG(INFO, 1211 "(%d) memory regions not changed\n", dev->vid); 1212 1213 close_msg_fds(msg); 1214 1215 return RTE_VHOST_MSG_RESULT_OK; 1216 } 1217 1218 if (dev->mem) { 1219 if (dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) { 1220 struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; 1221 1222 if (vdpa_dev && vdpa_dev->ops->dev_close) 1223 vdpa_dev->ops->dev_close(dev->vid); 1224 dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; 1225 } 1226 free_mem_region(dev); 1227 rte_free(dev->mem); 1228 dev->mem = NULL; 1229 } 1230 1231 /* Flush IOTLB cache as previous HVAs are now invalid */ 1232 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) 1233 for (i = 0; i < dev->nr_vring; i++) 1234 vhost_user_iotlb_flush_all(dev->virtqueue[i]); 1235 1236 dev->nr_guest_pages = 0; 1237 if (dev->guest_pages == NULL) { 1238 dev->max_guest_pages = 8; 1239 dev->guest_pages = rte_zmalloc(NULL, 1240 dev->max_guest_pages * 1241 sizeof(struct guest_page), 1242 RTE_CACHE_LINE_SIZE); 1243 if (dev->guest_pages == NULL) { 1244 VHOST_LOG_CONFIG(ERR, 1245 "(%d) failed to allocate memory " 1246 "for dev->guest_pages\n", 1247 dev->vid); 1248 goto close_msg_fds; 1249 } 1250 } 1251 1252 dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) + 1253 sizeof(struct rte_vhost_mem_region) * memory->nregions, 0); 1254 if (dev->mem == NULL) { 1255 VHOST_LOG_CONFIG(ERR, 1256 "(%d) failed to allocate memory for dev->mem\n", 1257 dev->vid); 1258 goto free_guest_pages; 1259 } 1260 1261 for (i = 0; i < memory->nregions; i++) { 1262 reg = &dev->mem->regions[i]; 1263 1264 reg->guest_phys_addr = memory->regions[i].guest_phys_addr; 1265 reg->guest_user_addr = memory->regions[i].userspace_addr; 1266 reg->size = memory->regions[i].memory_size; 1267 reg->fd = msg->fds[i]; 1268 1269 /* 1270 * Assign invalid file descriptor value to avoid double 1271 * closing on error path. 1272 */ 1273 msg->fds[i] = -1; 1274 1275 mmap_offset = memory->regions[i].mmap_offset; 1276 1277 if (vhost_user_mmap_region(dev, reg, mmap_offset) < 0) { 1278 VHOST_LOG_CONFIG(ERR, "Failed to mmap region %u\n", i); 1279 goto free_mem_table; 1280 } 1281 1282 dev->mem->nregions++; 1283 } 1284 1285 if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) 1286 goto free_mem_table; 1287 1288 for (i = 0; i < dev->nr_vring; i++) { 1289 struct vhost_virtqueue *vq = dev->virtqueue[i]; 1290 1291 if (!vq) 1292 continue; 1293 1294 if (vq->desc || vq->avail || vq->used) { 1295 /* 1296 * If the memory table got updated, the ring addresses 1297 * need to be translated again as virtual addresses have 1298 * changed. 1299 */ 1300 vring_invalidate(dev, vq); 1301 1302 dev = translate_ring_addresses(dev, i); 1303 if (!dev) { 1304 dev = *pdev; 1305 goto free_mem_table; 1306 } 1307 1308 *pdev = dev; 1309 } 1310 } 1311 1312 dump_guest_pages(dev); 1313 1314 return RTE_VHOST_MSG_RESULT_OK; 1315 1316 free_mem_table: 1317 free_mem_region(dev); 1318 rte_free(dev->mem); 1319 dev->mem = NULL; 1320 free_guest_pages: 1321 rte_free(dev->guest_pages); 1322 dev->guest_pages = NULL; 1323 close_msg_fds: 1324 close_msg_fds(msg); 1325 return RTE_VHOST_MSG_RESULT_ERR; 1326 } 1327 1328 static bool 1329 vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq) 1330 { 1331 bool rings_ok; 1332 1333 if (!vq) 1334 return false; 1335 1336 if (vq_is_packed(dev)) 1337 rings_ok = vq->desc_packed && vq->driver_event && 1338 vq->device_event; 1339 else 1340 rings_ok = vq->desc && vq->avail && vq->used; 1341 1342 return rings_ok && 1343 vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && 1344 vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD && 1345 vq->enabled; 1346 } 1347 1348 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u 1349 1350 static int 1351 virtio_is_ready(struct virtio_net *dev) 1352 { 1353 struct vhost_virtqueue *vq; 1354 uint32_t i, nr_vring = dev->nr_vring; 1355 1356 if (dev->flags & VIRTIO_DEV_READY) 1357 return 1; 1358 1359 if (!dev->nr_vring) 1360 return 0; 1361 1362 if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { 1363 nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; 1364 1365 if (dev->nr_vring < nr_vring) 1366 return 0; 1367 } 1368 1369 for (i = 0; i < nr_vring; i++) { 1370 vq = dev->virtqueue[i]; 1371 1372 if (!vq_is_ready(dev, vq)) 1373 return 0; 1374 } 1375 1376 /* If supported, ensure the frontend is really done with config */ 1377 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)) 1378 if (!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)) 1379 return 0; 1380 1381 dev->flags |= VIRTIO_DEV_READY; 1382 1383 if (!(dev->flags & VIRTIO_DEV_RUNNING)) 1384 VHOST_LOG_CONFIG(INFO, 1385 "virtio is now ready for processing.\n"); 1386 return 1; 1387 } 1388 1389 static void * 1390 inflight_mem_alloc(const char *name, size_t size, int *fd) 1391 { 1392 void *ptr; 1393 int mfd = -1; 1394 char fname[20] = "/tmp/memfd-XXXXXX"; 1395 1396 *fd = -1; 1397 #ifdef MEMFD_SUPPORTED 1398 mfd = memfd_create(name, MFD_CLOEXEC); 1399 #else 1400 RTE_SET_USED(name); 1401 #endif 1402 if (mfd == -1) { 1403 mfd = mkstemp(fname); 1404 if (mfd == -1) { 1405 VHOST_LOG_CONFIG(ERR, 1406 "failed to get inflight buffer fd\n"); 1407 return NULL; 1408 } 1409 1410 unlink(fname); 1411 } 1412 1413 if (ftruncate(mfd, size) == -1) { 1414 VHOST_LOG_CONFIG(ERR, 1415 "failed to alloc inflight buffer\n"); 1416 close(mfd); 1417 return NULL; 1418 } 1419 1420 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); 1421 if (ptr == MAP_FAILED) { 1422 VHOST_LOG_CONFIG(ERR, 1423 "failed to mmap inflight buffer\n"); 1424 close(mfd); 1425 return NULL; 1426 } 1427 1428 *fd = mfd; 1429 return ptr; 1430 } 1431 1432 static uint32_t 1433 get_pervq_shm_size_split(uint16_t queue_size) 1434 { 1435 return RTE_ALIGN_MUL_CEIL(sizeof(struct rte_vhost_inflight_desc_split) * 1436 queue_size + sizeof(uint64_t) + 1437 sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); 1438 } 1439 1440 static uint32_t 1441 get_pervq_shm_size_packed(uint16_t queue_size) 1442 { 1443 return RTE_ALIGN_MUL_CEIL(sizeof(struct rte_vhost_inflight_desc_packed) 1444 * queue_size + sizeof(uint64_t) + 1445 sizeof(uint16_t) * 6 + sizeof(uint8_t) * 9, 1446 INFLIGHT_ALIGNMENT); 1447 } 1448 1449 static int 1450 vhost_user_get_inflight_fd(struct virtio_net **pdev, 1451 VhostUserMsg *msg, 1452 int main_fd __rte_unused) 1453 { 1454 struct rte_vhost_inflight_info_packed *inflight_packed; 1455 uint64_t pervq_inflight_size, mmap_size; 1456 uint16_t num_queues, queue_size; 1457 struct virtio_net *dev = *pdev; 1458 int fd, i, j; 1459 void *addr; 1460 1461 if (msg->size != sizeof(msg->payload.inflight)) { 1462 VHOST_LOG_CONFIG(ERR, 1463 "invalid get_inflight_fd message size is %d\n", 1464 msg->size); 1465 return RTE_VHOST_MSG_RESULT_ERR; 1466 } 1467 1468 if (dev->inflight_info == NULL) { 1469 dev->inflight_info = calloc(1, 1470 sizeof(struct inflight_mem_info)); 1471 if (!dev->inflight_info) { 1472 VHOST_LOG_CONFIG(ERR, 1473 "failed to alloc dev inflight area\n"); 1474 return RTE_VHOST_MSG_RESULT_ERR; 1475 } 1476 dev->inflight_info->fd = -1; 1477 } 1478 1479 num_queues = msg->payload.inflight.num_queues; 1480 queue_size = msg->payload.inflight.queue_size; 1481 1482 VHOST_LOG_CONFIG(INFO, "get_inflight_fd num_queues: %u\n", 1483 msg->payload.inflight.num_queues); 1484 VHOST_LOG_CONFIG(INFO, "get_inflight_fd queue_size: %u\n", 1485 msg->payload.inflight.queue_size); 1486 1487 if (vq_is_packed(dev)) 1488 pervq_inflight_size = get_pervq_shm_size_packed(queue_size); 1489 else 1490 pervq_inflight_size = get_pervq_shm_size_split(queue_size); 1491 1492 mmap_size = num_queues * pervq_inflight_size; 1493 addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); 1494 if (!addr) { 1495 VHOST_LOG_CONFIG(ERR, 1496 "failed to alloc vhost inflight area\n"); 1497 msg->payload.inflight.mmap_size = 0; 1498 return RTE_VHOST_MSG_RESULT_ERR; 1499 } 1500 memset(addr, 0, mmap_size); 1501 1502 if (dev->inflight_info->addr) { 1503 munmap(dev->inflight_info->addr, dev->inflight_info->size); 1504 dev->inflight_info->addr = NULL; 1505 } 1506 1507 if (dev->inflight_info->fd >= 0) { 1508 close(dev->inflight_info->fd); 1509 dev->inflight_info->fd = -1; 1510 } 1511 1512 dev->inflight_info->addr = addr; 1513 dev->inflight_info->size = msg->payload.inflight.mmap_size = mmap_size; 1514 dev->inflight_info->fd = msg->fds[0] = fd; 1515 msg->payload.inflight.mmap_offset = 0; 1516 msg->fd_num = 1; 1517 1518 if (vq_is_packed(dev)) { 1519 for (i = 0; i < num_queues; i++) { 1520 inflight_packed = 1521 (struct rte_vhost_inflight_info_packed *)addr; 1522 inflight_packed->used_wrap_counter = 1; 1523 inflight_packed->old_used_wrap_counter = 1; 1524 for (j = 0; j < queue_size; j++) 1525 inflight_packed->desc[j].next = j + 1; 1526 addr = (void *)((char *)addr + pervq_inflight_size); 1527 } 1528 } 1529 1530 VHOST_LOG_CONFIG(INFO, 1531 "send inflight mmap_size: %"PRIu64"\n", 1532 msg->payload.inflight.mmap_size); 1533 VHOST_LOG_CONFIG(INFO, 1534 "send inflight mmap_offset: %"PRIu64"\n", 1535 msg->payload.inflight.mmap_offset); 1536 VHOST_LOG_CONFIG(INFO, 1537 "send inflight fd: %d\n", msg->fds[0]); 1538 1539 return RTE_VHOST_MSG_RESULT_REPLY; 1540 } 1541 1542 static int 1543 vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, 1544 int main_fd __rte_unused) 1545 { 1546 uint64_t mmap_size, mmap_offset; 1547 uint16_t num_queues, queue_size; 1548 struct virtio_net *dev = *pdev; 1549 uint32_t pervq_inflight_size; 1550 struct vhost_virtqueue *vq; 1551 void *addr; 1552 int fd, i; 1553 1554 fd = msg->fds[0]; 1555 if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { 1556 VHOST_LOG_CONFIG(ERR, 1557 "invalid set_inflight_fd message size is %d,fd is %d\n", 1558 msg->size, fd); 1559 return RTE_VHOST_MSG_RESULT_ERR; 1560 } 1561 1562 mmap_size = msg->payload.inflight.mmap_size; 1563 mmap_offset = msg->payload.inflight.mmap_offset; 1564 num_queues = msg->payload.inflight.num_queues; 1565 queue_size = msg->payload.inflight.queue_size; 1566 1567 if (vq_is_packed(dev)) 1568 pervq_inflight_size = get_pervq_shm_size_packed(queue_size); 1569 else 1570 pervq_inflight_size = get_pervq_shm_size_split(queue_size); 1571 1572 VHOST_LOG_CONFIG(INFO, 1573 "set_inflight_fd mmap_size: %"PRIu64"\n", mmap_size); 1574 VHOST_LOG_CONFIG(INFO, 1575 "set_inflight_fd mmap_offset: %"PRIu64"\n", mmap_offset); 1576 VHOST_LOG_CONFIG(INFO, 1577 "set_inflight_fd num_queues: %u\n", num_queues); 1578 VHOST_LOG_CONFIG(INFO, 1579 "set_inflight_fd queue_size: %u\n", queue_size); 1580 VHOST_LOG_CONFIG(INFO, 1581 "set_inflight_fd fd: %d\n", fd); 1582 VHOST_LOG_CONFIG(INFO, 1583 "set_inflight_fd pervq_inflight_size: %d\n", 1584 pervq_inflight_size); 1585 1586 if (!dev->inflight_info) { 1587 dev->inflight_info = calloc(1, 1588 sizeof(struct inflight_mem_info)); 1589 if (dev->inflight_info == NULL) { 1590 VHOST_LOG_CONFIG(ERR, 1591 "failed to alloc dev inflight area\n"); 1592 return RTE_VHOST_MSG_RESULT_ERR; 1593 } 1594 dev->inflight_info->fd = -1; 1595 } 1596 1597 if (dev->inflight_info->addr) { 1598 munmap(dev->inflight_info->addr, dev->inflight_info->size); 1599 dev->inflight_info->addr = NULL; 1600 } 1601 1602 addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1603 fd, mmap_offset); 1604 if (addr == MAP_FAILED) { 1605 VHOST_LOG_CONFIG(ERR, "failed to mmap share memory.\n"); 1606 return RTE_VHOST_MSG_RESULT_ERR; 1607 } 1608 1609 if (dev->inflight_info->fd >= 0) { 1610 close(dev->inflight_info->fd); 1611 dev->inflight_info->fd = -1; 1612 } 1613 1614 dev->inflight_info->fd = fd; 1615 dev->inflight_info->addr = addr; 1616 dev->inflight_info->size = mmap_size; 1617 1618 for (i = 0; i < num_queues; i++) { 1619 vq = dev->virtqueue[i]; 1620 if (!vq) 1621 continue; 1622 1623 if (vq_is_packed(dev)) { 1624 vq->inflight_packed = addr; 1625 vq->inflight_packed->desc_num = queue_size; 1626 } else { 1627 vq->inflight_split = addr; 1628 vq->inflight_split->desc_num = queue_size; 1629 } 1630 addr = (void *)((char *)addr + pervq_inflight_size); 1631 } 1632 1633 return RTE_VHOST_MSG_RESULT_OK; 1634 } 1635 1636 static int 1637 vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, 1638 int main_fd __rte_unused) 1639 { 1640 struct virtio_net *dev = *pdev; 1641 struct vhost_vring_file file; 1642 struct vhost_virtqueue *vq; 1643 int expected_fds; 1644 1645 expected_fds = (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) ? 0 : 1; 1646 if (validate_msg_fds(msg, expected_fds) != 0) 1647 return RTE_VHOST_MSG_RESULT_ERR; 1648 1649 file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; 1650 if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) 1651 file.fd = VIRTIO_INVALID_EVENTFD; 1652 else 1653 file.fd = msg->fds[0]; 1654 VHOST_LOG_CONFIG(INFO, 1655 "vring call idx:%d file:%d\n", file.index, file.fd); 1656 1657 vq = dev->virtqueue[file.index]; 1658 1659 if (vq->ready) { 1660 vq->ready = false; 1661 vhost_user_notify_queue_state(dev, file.index, 0); 1662 } 1663 1664 if (vq->callfd >= 0) 1665 close(vq->callfd); 1666 1667 vq->callfd = file.fd; 1668 1669 return RTE_VHOST_MSG_RESULT_OK; 1670 } 1671 1672 static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, 1673 struct VhostUserMsg *msg, 1674 int main_fd __rte_unused) 1675 { 1676 int expected_fds; 1677 1678 expected_fds = (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) ? 0 : 1; 1679 if (validate_msg_fds(msg, expected_fds) != 0) 1680 return RTE_VHOST_MSG_RESULT_ERR; 1681 1682 if (!(msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) 1683 close(msg->fds[0]); 1684 VHOST_LOG_CONFIG(INFO, "not implemented\n"); 1685 1686 return RTE_VHOST_MSG_RESULT_OK; 1687 } 1688 1689 static int 1690 resubmit_desc_compare(const void *a, const void *b) 1691 { 1692 const struct rte_vhost_resubmit_desc *desc0 = a; 1693 const struct rte_vhost_resubmit_desc *desc1 = b; 1694 1695 if (desc1->counter > desc0->counter) 1696 return 1; 1697 1698 return -1; 1699 } 1700 1701 static int 1702 vhost_check_queue_inflights_split(struct virtio_net *dev, 1703 struct vhost_virtqueue *vq) 1704 { 1705 uint16_t i; 1706 uint16_t resubmit_num = 0, last_io, num; 1707 struct vring_used *used = vq->used; 1708 struct rte_vhost_resubmit_info *resubmit; 1709 struct rte_vhost_inflight_info_split *inflight_split; 1710 1711 if (!(dev->protocol_features & 1712 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) 1713 return RTE_VHOST_MSG_RESULT_OK; 1714 1715 /* The frontend may still not support the inflight feature 1716 * although we negotiate the protocol feature. 1717 */ 1718 if ((!vq->inflight_split)) 1719 return RTE_VHOST_MSG_RESULT_OK; 1720 1721 if (!vq->inflight_split->version) { 1722 vq->inflight_split->version = INFLIGHT_VERSION; 1723 return RTE_VHOST_MSG_RESULT_OK; 1724 } 1725 1726 if (vq->resubmit_inflight) 1727 return RTE_VHOST_MSG_RESULT_OK; 1728 1729 inflight_split = vq->inflight_split; 1730 vq->global_counter = 0; 1731 last_io = inflight_split->last_inflight_io; 1732 1733 if (inflight_split->used_idx != used->idx) { 1734 inflight_split->desc[last_io].inflight = 0; 1735 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 1736 inflight_split->used_idx = used->idx; 1737 } 1738 1739 for (i = 0; i < inflight_split->desc_num; i++) { 1740 if (inflight_split->desc[i].inflight == 1) 1741 resubmit_num++; 1742 } 1743 1744 vq->last_avail_idx += resubmit_num; 1745 1746 if (resubmit_num) { 1747 resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info)); 1748 if (!resubmit) { 1749 VHOST_LOG_CONFIG(ERR, 1750 "failed to allocate memory for resubmit info.\n"); 1751 return RTE_VHOST_MSG_RESULT_ERR; 1752 } 1753 1754 resubmit->resubmit_list = calloc(resubmit_num, 1755 sizeof(struct rte_vhost_resubmit_desc)); 1756 if (!resubmit->resubmit_list) { 1757 VHOST_LOG_CONFIG(ERR, 1758 "failed to allocate memory for inflight desc.\n"); 1759 free(resubmit); 1760 return RTE_VHOST_MSG_RESULT_ERR; 1761 } 1762 1763 num = 0; 1764 for (i = 0; i < vq->inflight_split->desc_num; i++) { 1765 if (vq->inflight_split->desc[i].inflight == 1) { 1766 resubmit->resubmit_list[num].index = i; 1767 resubmit->resubmit_list[num].counter = 1768 inflight_split->desc[i].counter; 1769 num++; 1770 } 1771 } 1772 resubmit->resubmit_num = num; 1773 1774 if (resubmit->resubmit_num > 1) 1775 qsort(resubmit->resubmit_list, resubmit->resubmit_num, 1776 sizeof(struct rte_vhost_resubmit_desc), 1777 resubmit_desc_compare); 1778 1779 vq->global_counter = resubmit->resubmit_list[0].counter + 1; 1780 vq->resubmit_inflight = resubmit; 1781 } 1782 1783 return RTE_VHOST_MSG_RESULT_OK; 1784 } 1785 1786 static int 1787 vhost_check_queue_inflights_packed(struct virtio_net *dev, 1788 struct vhost_virtqueue *vq) 1789 { 1790 uint16_t i; 1791 uint16_t resubmit_num = 0, old_used_idx, num; 1792 struct rte_vhost_resubmit_info *resubmit; 1793 struct rte_vhost_inflight_info_packed *inflight_packed; 1794 1795 if (!(dev->protocol_features & 1796 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) 1797 return RTE_VHOST_MSG_RESULT_OK; 1798 1799 /* The frontend may still not support the inflight feature 1800 * although we negotiate the protocol feature. 1801 */ 1802 if ((!vq->inflight_packed)) 1803 return RTE_VHOST_MSG_RESULT_OK; 1804 1805 if (!vq->inflight_packed->version) { 1806 vq->inflight_packed->version = INFLIGHT_VERSION; 1807 return RTE_VHOST_MSG_RESULT_OK; 1808 } 1809 1810 if (vq->resubmit_inflight) 1811 return RTE_VHOST_MSG_RESULT_OK; 1812 1813 inflight_packed = vq->inflight_packed; 1814 vq->global_counter = 0; 1815 old_used_idx = inflight_packed->old_used_idx; 1816 1817 if (inflight_packed->used_idx != old_used_idx) { 1818 if (inflight_packed->desc[old_used_idx].inflight == 0) { 1819 inflight_packed->old_used_idx = 1820 inflight_packed->used_idx; 1821 inflight_packed->old_used_wrap_counter = 1822 inflight_packed->used_wrap_counter; 1823 inflight_packed->old_free_head = 1824 inflight_packed->free_head; 1825 } else { 1826 inflight_packed->used_idx = 1827 inflight_packed->old_used_idx; 1828 inflight_packed->used_wrap_counter = 1829 inflight_packed->old_used_wrap_counter; 1830 inflight_packed->free_head = 1831 inflight_packed->old_free_head; 1832 } 1833 } 1834 1835 for (i = 0; i < inflight_packed->desc_num; i++) { 1836 if (inflight_packed->desc[i].inflight == 1) 1837 resubmit_num++; 1838 } 1839 1840 if (resubmit_num) { 1841 resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info)); 1842 if (resubmit == NULL) { 1843 VHOST_LOG_CONFIG(ERR, 1844 "failed to allocate memory for resubmit info.\n"); 1845 return RTE_VHOST_MSG_RESULT_ERR; 1846 } 1847 1848 resubmit->resubmit_list = calloc(resubmit_num, 1849 sizeof(struct rte_vhost_resubmit_desc)); 1850 if (resubmit->resubmit_list == NULL) { 1851 VHOST_LOG_CONFIG(ERR, 1852 "failed to allocate memory for resubmit desc.\n"); 1853 free(resubmit); 1854 return RTE_VHOST_MSG_RESULT_ERR; 1855 } 1856 1857 num = 0; 1858 for (i = 0; i < inflight_packed->desc_num; i++) { 1859 if (vq->inflight_packed->desc[i].inflight == 1) { 1860 resubmit->resubmit_list[num].index = i; 1861 resubmit->resubmit_list[num].counter = 1862 inflight_packed->desc[i].counter; 1863 num++; 1864 } 1865 } 1866 resubmit->resubmit_num = num; 1867 1868 if (resubmit->resubmit_num > 1) 1869 qsort(resubmit->resubmit_list, resubmit->resubmit_num, 1870 sizeof(struct rte_vhost_resubmit_desc), 1871 resubmit_desc_compare); 1872 1873 vq->global_counter = resubmit->resubmit_list[0].counter + 1; 1874 vq->resubmit_inflight = resubmit; 1875 } 1876 1877 return RTE_VHOST_MSG_RESULT_OK; 1878 } 1879 1880 static int 1881 vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, 1882 int main_fd __rte_unused) 1883 { 1884 struct virtio_net *dev = *pdev; 1885 struct vhost_vring_file file; 1886 struct vhost_virtqueue *vq; 1887 int expected_fds; 1888 1889 expected_fds = (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) ? 0 : 1; 1890 if (validate_msg_fds(msg, expected_fds) != 0) 1891 return RTE_VHOST_MSG_RESULT_ERR; 1892 1893 file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; 1894 if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) 1895 file.fd = VIRTIO_INVALID_EVENTFD; 1896 else 1897 file.fd = msg->fds[0]; 1898 VHOST_LOG_CONFIG(INFO, 1899 "vring kick idx:%d file:%d\n", file.index, file.fd); 1900 1901 /* Interpret ring addresses only when ring is started. */ 1902 dev = translate_ring_addresses(dev, file.index); 1903 if (!dev) { 1904 if (file.fd != VIRTIO_INVALID_EVENTFD) 1905 close(file.fd); 1906 1907 return RTE_VHOST_MSG_RESULT_ERR; 1908 } 1909 1910 *pdev = dev; 1911 1912 vq = dev->virtqueue[file.index]; 1913 1914 /* 1915 * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, 1916 * the ring starts already enabled. Otherwise, it is enabled via 1917 * the SET_VRING_ENABLE message. 1918 */ 1919 if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { 1920 vq->enabled = true; 1921 } 1922 1923 if (vq->ready) { 1924 vq->ready = false; 1925 vhost_user_notify_queue_state(dev, file.index, 0); 1926 } 1927 1928 if (vq->kickfd >= 0) 1929 close(vq->kickfd); 1930 vq->kickfd = file.fd; 1931 1932 if (vq_is_packed(dev)) { 1933 if (vhost_check_queue_inflights_packed(dev, vq)) { 1934 VHOST_LOG_CONFIG(ERR, 1935 "failed to inflights for vq: %d\n", file.index); 1936 return RTE_VHOST_MSG_RESULT_ERR; 1937 } 1938 } else { 1939 if (vhost_check_queue_inflights_split(dev, vq)) { 1940 VHOST_LOG_CONFIG(ERR, 1941 "failed to inflights for vq: %d\n", file.index); 1942 return RTE_VHOST_MSG_RESULT_ERR; 1943 } 1944 } 1945 1946 return RTE_VHOST_MSG_RESULT_OK; 1947 } 1948 1949 /* 1950 * when virtio is stopped, qemu will send us the GET_VRING_BASE message. 1951 */ 1952 static int 1953 vhost_user_get_vring_base(struct virtio_net **pdev, 1954 struct VhostUserMsg *msg, 1955 int main_fd __rte_unused) 1956 { 1957 struct virtio_net *dev = *pdev; 1958 struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; 1959 uint64_t val; 1960 1961 if (validate_msg_fds(msg, 0) != 0) 1962 return RTE_VHOST_MSG_RESULT_ERR; 1963 1964 /* We have to stop the queue (virtio) if it is running. */ 1965 vhost_destroy_device_notify(dev); 1966 1967 dev->flags &= ~VIRTIO_DEV_READY; 1968 dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; 1969 1970 /* Here we are safe to get the indexes */ 1971 if (vq_is_packed(dev)) { 1972 /* 1973 * Bit[0:14]: avail index 1974 * Bit[15]: avail wrap counter 1975 */ 1976 val = vq->last_avail_idx & 0x7fff; 1977 val |= vq->avail_wrap_counter << 15; 1978 msg->payload.state.num = val; 1979 } else { 1980 msg->payload.state.num = vq->last_avail_idx; 1981 } 1982 1983 VHOST_LOG_CONFIG(INFO, 1984 "vring base idx:%d file:%d\n", msg->payload.state.index, 1985 msg->payload.state.num); 1986 /* 1987 * Based on current qemu vhost-user implementation, this message is 1988 * sent and only sent in vhost_vring_stop. 1989 * TODO: cleanup the vring, it isn't usable since here. 1990 */ 1991 if (vq->kickfd >= 0) 1992 close(vq->kickfd); 1993 1994 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; 1995 1996 if (vq->callfd >= 0) 1997 close(vq->callfd); 1998 1999 vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; 2000 2001 vq->signalled_used_valid = false; 2002 2003 if (vq_is_packed(dev)) { 2004 rte_free(vq->shadow_used_packed); 2005 vq->shadow_used_packed = NULL; 2006 } else { 2007 rte_free(vq->shadow_used_split); 2008 vq->shadow_used_split = NULL; 2009 } 2010 2011 rte_free(vq->batch_copy_elems); 2012 vq->batch_copy_elems = NULL; 2013 2014 rte_free(vq->log_cache); 2015 vq->log_cache = NULL; 2016 2017 msg->size = sizeof(msg->payload.state); 2018 msg->fd_num = 0; 2019 2020 vring_invalidate(dev, vq); 2021 2022 return RTE_VHOST_MSG_RESULT_REPLY; 2023 } 2024 2025 /* 2026 * when virtio queues are ready to work, qemu will send us to 2027 * enable the virtio queue pair. 2028 */ 2029 static int 2030 vhost_user_set_vring_enable(struct virtio_net **pdev, 2031 struct VhostUserMsg *msg, 2032 int main_fd __rte_unused) 2033 { 2034 struct virtio_net *dev = *pdev; 2035 bool enable = !!msg->payload.state.num; 2036 int index = (int)msg->payload.state.index; 2037 2038 if (validate_msg_fds(msg, 0) != 0) 2039 return RTE_VHOST_MSG_RESULT_ERR; 2040 2041 VHOST_LOG_CONFIG(INFO, 2042 "set queue enable: %d to qp idx: %d\n", 2043 enable, index); 2044 2045 if (enable && dev->virtqueue[index]->async_registered) { 2046 if (dev->virtqueue[index]->async_pkts_inflight_n) { 2047 VHOST_LOG_CONFIG(ERR, "failed to enable vring. " 2048 "async inflight packets must be completed first\n"); 2049 return RTE_VHOST_MSG_RESULT_ERR; 2050 } 2051 } 2052 2053 dev->virtqueue[index]->enabled = enable; 2054 2055 return RTE_VHOST_MSG_RESULT_OK; 2056 } 2057 2058 static int 2059 vhost_user_get_protocol_features(struct virtio_net **pdev, 2060 struct VhostUserMsg *msg, 2061 int main_fd __rte_unused) 2062 { 2063 struct virtio_net *dev = *pdev; 2064 uint64_t features, protocol_features; 2065 2066 if (validate_msg_fds(msg, 0) != 0) 2067 return RTE_VHOST_MSG_RESULT_ERR; 2068 2069 rte_vhost_driver_get_features(dev->ifname, &features); 2070 rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features); 2071 2072 msg->payload.u64 = protocol_features; 2073 msg->size = sizeof(msg->payload.u64); 2074 msg->fd_num = 0; 2075 2076 return RTE_VHOST_MSG_RESULT_REPLY; 2077 } 2078 2079 static int 2080 vhost_user_set_protocol_features(struct virtio_net **pdev, 2081 struct VhostUserMsg *msg, 2082 int main_fd __rte_unused) 2083 { 2084 struct virtio_net *dev = *pdev; 2085 uint64_t protocol_features = msg->payload.u64; 2086 uint64_t slave_protocol_features = 0; 2087 2088 if (validate_msg_fds(msg, 0) != 0) 2089 return RTE_VHOST_MSG_RESULT_ERR; 2090 2091 rte_vhost_driver_get_protocol_features(dev->ifname, 2092 &slave_protocol_features); 2093 if (protocol_features & ~slave_protocol_features) { 2094 VHOST_LOG_CONFIG(ERR, 2095 "(%d) received invalid protocol features.\n", 2096 dev->vid); 2097 return RTE_VHOST_MSG_RESULT_ERR; 2098 } 2099 2100 dev->protocol_features = protocol_features; 2101 VHOST_LOG_CONFIG(INFO, 2102 "negotiated Vhost-user protocol features: 0x%" PRIx64 "\n", 2103 dev->protocol_features); 2104 2105 return RTE_VHOST_MSG_RESULT_OK; 2106 } 2107 2108 static int 2109 vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg, 2110 int main_fd __rte_unused) 2111 { 2112 struct virtio_net *dev = *pdev; 2113 int fd = msg->fds[0]; 2114 uint64_t size, off; 2115 void *addr; 2116 uint32_t i; 2117 2118 if (validate_msg_fds(msg, 1) != 0) 2119 return RTE_VHOST_MSG_RESULT_ERR; 2120 2121 if (fd < 0) { 2122 VHOST_LOG_CONFIG(ERR, "invalid log fd: %d\n", fd); 2123 return RTE_VHOST_MSG_RESULT_ERR; 2124 } 2125 2126 if (msg->size != sizeof(VhostUserLog)) { 2127 VHOST_LOG_CONFIG(ERR, 2128 "invalid log base msg size: %"PRId32" != %d\n", 2129 msg->size, (int)sizeof(VhostUserLog)); 2130 goto close_msg_fds; 2131 } 2132 2133 size = msg->payload.log.mmap_size; 2134 off = msg->payload.log.mmap_offset; 2135 2136 /* Check for mmap size and offset overflow. */ 2137 if (off >= -size) { 2138 VHOST_LOG_CONFIG(ERR, 2139 "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n", 2140 off, size); 2141 goto close_msg_fds; 2142 } 2143 2144 VHOST_LOG_CONFIG(INFO, 2145 "log mmap size: %"PRId64", offset: %"PRId64"\n", 2146 size, off); 2147 2148 /* 2149 * mmap from 0 to workaround a hugepage mmap bug: mmap will 2150 * fail when offset is not page size aligned. 2151 */ 2152 addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2153 close(fd); 2154 if (addr == MAP_FAILED) { 2155 VHOST_LOG_CONFIG(ERR, "mmap log base failed!\n"); 2156 return RTE_VHOST_MSG_RESULT_ERR; 2157 } 2158 2159 /* 2160 * Free previously mapped log memory on occasionally 2161 * multiple VHOST_USER_SET_LOG_BASE. 2162 */ 2163 if (dev->log_addr) { 2164 munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); 2165 } 2166 dev->log_addr = (uint64_t)(uintptr_t)addr; 2167 dev->log_base = dev->log_addr + off; 2168 dev->log_size = size; 2169 2170 for (i = 0; i < dev->nr_vring; i++) { 2171 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2172 2173 rte_free(vq->log_cache); 2174 vq->log_cache = NULL; 2175 vq->log_cache_nb_elem = 0; 2176 vq->log_cache = rte_zmalloc("vq log cache", 2177 sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR, 2178 0); 2179 /* 2180 * If log cache alloc fail, don't fail migration, but no 2181 * caching will be done, which will impact performance 2182 */ 2183 if (!vq->log_cache) 2184 VHOST_LOG_CONFIG(ERR, "Failed to allocate VQ logging cache\n"); 2185 } 2186 2187 /* 2188 * The spec is not clear about it (yet), but QEMU doesn't expect 2189 * any payload in the reply. 2190 */ 2191 msg->size = 0; 2192 msg->fd_num = 0; 2193 2194 return RTE_VHOST_MSG_RESULT_REPLY; 2195 2196 close_msg_fds: 2197 close_msg_fds(msg); 2198 return RTE_VHOST_MSG_RESULT_ERR; 2199 } 2200 2201 static int vhost_user_set_log_fd(struct virtio_net **pdev __rte_unused, 2202 struct VhostUserMsg *msg, 2203 int main_fd __rte_unused) 2204 { 2205 if (validate_msg_fds(msg, 1) != 0) 2206 return RTE_VHOST_MSG_RESULT_ERR; 2207 2208 close(msg->fds[0]); 2209 VHOST_LOG_CONFIG(INFO, "not implemented.\n"); 2210 2211 return RTE_VHOST_MSG_RESULT_OK; 2212 } 2213 2214 /* 2215 * An rarp packet is constructed and broadcasted to notify switches about 2216 * the new location of the migrated VM, so that packets from outside will 2217 * not be lost after migration. 2218 * 2219 * However, we don't actually "send" a rarp packet here, instead, we set 2220 * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. 2221 */ 2222 static int 2223 vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg, 2224 int main_fd __rte_unused) 2225 { 2226 struct virtio_net *dev = *pdev; 2227 uint8_t *mac = (uint8_t *)&msg->payload.u64; 2228 struct rte_vdpa_device *vdpa_dev; 2229 2230 if (validate_msg_fds(msg, 0) != 0) 2231 return RTE_VHOST_MSG_RESULT_ERR; 2232 2233 VHOST_LOG_CONFIG(DEBUG, 2234 ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", 2235 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); 2236 memcpy(dev->mac.addr_bytes, mac, 6); 2237 2238 /* 2239 * Set the flag to inject a RARP broadcast packet at 2240 * rte_vhost_dequeue_burst(). 2241 * 2242 * __ATOMIC_RELEASE ordering is for making sure the mac is 2243 * copied before the flag is set. 2244 */ 2245 __atomic_store_n(&dev->broadcast_rarp, 1, __ATOMIC_RELEASE); 2246 vdpa_dev = dev->vdpa_dev; 2247 if (vdpa_dev && vdpa_dev->ops->migration_done) 2248 vdpa_dev->ops->migration_done(dev->vid); 2249 2250 return RTE_VHOST_MSG_RESULT_OK; 2251 } 2252 2253 static int 2254 vhost_user_net_set_mtu(struct virtio_net **pdev, struct VhostUserMsg *msg, 2255 int main_fd __rte_unused) 2256 { 2257 struct virtio_net *dev = *pdev; 2258 2259 if (validate_msg_fds(msg, 0) != 0) 2260 return RTE_VHOST_MSG_RESULT_ERR; 2261 2262 if (msg->payload.u64 < VIRTIO_MIN_MTU || 2263 msg->payload.u64 > VIRTIO_MAX_MTU) { 2264 VHOST_LOG_CONFIG(ERR, "Invalid MTU size (%"PRIu64")\n", 2265 msg->payload.u64); 2266 2267 return RTE_VHOST_MSG_RESULT_ERR; 2268 } 2269 2270 dev->mtu = msg->payload.u64; 2271 2272 return RTE_VHOST_MSG_RESULT_OK; 2273 } 2274 2275 static int 2276 vhost_user_set_req_fd(struct virtio_net **pdev, struct VhostUserMsg *msg, 2277 int main_fd __rte_unused) 2278 { 2279 struct virtio_net *dev = *pdev; 2280 int fd = msg->fds[0]; 2281 2282 if (validate_msg_fds(msg, 1) != 0) 2283 return RTE_VHOST_MSG_RESULT_ERR; 2284 2285 if (fd < 0) { 2286 VHOST_LOG_CONFIG(ERR, 2287 "Invalid file descriptor for slave channel (%d)\n", 2288 fd); 2289 return RTE_VHOST_MSG_RESULT_ERR; 2290 } 2291 2292 if (dev->slave_req_fd >= 0) 2293 close(dev->slave_req_fd); 2294 2295 dev->slave_req_fd = fd; 2296 2297 return RTE_VHOST_MSG_RESULT_OK; 2298 } 2299 2300 static int 2301 is_vring_iotlb_split(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg) 2302 { 2303 struct vhost_vring_addr *ra; 2304 uint64_t start, end, len; 2305 2306 start = imsg->iova; 2307 end = start + imsg->size; 2308 2309 ra = &vq->ring_addrs; 2310 len = sizeof(struct vring_desc) * vq->size; 2311 if (ra->desc_user_addr < end && (ra->desc_user_addr + len) > start) 2312 return 1; 2313 2314 len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; 2315 if (ra->avail_user_addr < end && (ra->avail_user_addr + len) > start) 2316 return 1; 2317 2318 len = sizeof(struct vring_used) + 2319 sizeof(struct vring_used_elem) * vq->size; 2320 if (ra->used_user_addr < end && (ra->used_user_addr + len) > start) 2321 return 1; 2322 2323 if (ra->flags & (1 << VHOST_VRING_F_LOG)) { 2324 len = sizeof(uint64_t); 2325 if (ra->log_guest_addr < end && 2326 (ra->log_guest_addr + len) > start) 2327 return 1; 2328 } 2329 2330 return 0; 2331 } 2332 2333 static int 2334 is_vring_iotlb_packed(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg) 2335 { 2336 struct vhost_vring_addr *ra; 2337 uint64_t start, end, len; 2338 2339 start = imsg->iova; 2340 end = start + imsg->size; 2341 2342 ra = &vq->ring_addrs; 2343 len = sizeof(struct vring_packed_desc) * vq->size; 2344 if (ra->desc_user_addr < end && (ra->desc_user_addr + len) > start) 2345 return 1; 2346 2347 len = sizeof(struct vring_packed_desc_event); 2348 if (ra->avail_user_addr < end && (ra->avail_user_addr + len) > start) 2349 return 1; 2350 2351 len = sizeof(struct vring_packed_desc_event); 2352 if (ra->used_user_addr < end && (ra->used_user_addr + len) > start) 2353 return 1; 2354 2355 if (ra->flags & (1 << VHOST_VRING_F_LOG)) { 2356 len = sizeof(uint64_t); 2357 if (ra->log_guest_addr < end && 2358 (ra->log_guest_addr + len) > start) 2359 return 1; 2360 } 2361 2362 return 0; 2363 } 2364 2365 static int is_vring_iotlb(struct virtio_net *dev, 2366 struct vhost_virtqueue *vq, 2367 struct vhost_iotlb_msg *imsg) 2368 { 2369 if (vq_is_packed(dev)) 2370 return is_vring_iotlb_packed(vq, imsg); 2371 else 2372 return is_vring_iotlb_split(vq, imsg); 2373 } 2374 2375 static int 2376 vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg, 2377 int main_fd __rte_unused) 2378 { 2379 struct virtio_net *dev = *pdev; 2380 struct vhost_iotlb_msg *imsg = &msg->payload.iotlb; 2381 uint16_t i; 2382 uint64_t vva, len; 2383 2384 if (validate_msg_fds(msg, 0) != 0) 2385 return RTE_VHOST_MSG_RESULT_ERR; 2386 2387 switch (imsg->type) { 2388 case VHOST_IOTLB_UPDATE: 2389 len = imsg->size; 2390 vva = qva_to_vva(dev, imsg->uaddr, &len); 2391 if (!vva) 2392 return RTE_VHOST_MSG_RESULT_ERR; 2393 2394 for (i = 0; i < dev->nr_vring; i++) { 2395 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2396 2397 if (!vq) 2398 continue; 2399 2400 vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, 2401 len, imsg->perm); 2402 2403 if (is_vring_iotlb(dev, vq, imsg)) 2404 *pdev = dev = translate_ring_addresses(dev, i); 2405 } 2406 break; 2407 case VHOST_IOTLB_INVALIDATE: 2408 for (i = 0; i < dev->nr_vring; i++) { 2409 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2410 2411 if (!vq) 2412 continue; 2413 2414 vhost_user_iotlb_cache_remove(vq, imsg->iova, 2415 imsg->size); 2416 2417 if (is_vring_iotlb(dev, vq, imsg)) 2418 vring_invalidate(dev, vq); 2419 } 2420 break; 2421 default: 2422 VHOST_LOG_CONFIG(ERR, "Invalid IOTLB message type (%d)\n", 2423 imsg->type); 2424 return RTE_VHOST_MSG_RESULT_ERR; 2425 } 2426 2427 return RTE_VHOST_MSG_RESULT_OK; 2428 } 2429 2430 static int 2431 vhost_user_set_postcopy_advise(struct virtio_net **pdev, 2432 struct VhostUserMsg *msg, 2433 int main_fd __rte_unused) 2434 { 2435 struct virtio_net *dev = *pdev; 2436 #ifdef RTE_LIBRTE_VHOST_POSTCOPY 2437 struct uffdio_api api_struct; 2438 2439 if (validate_msg_fds(msg, 0) != 0) 2440 return RTE_VHOST_MSG_RESULT_ERR; 2441 2442 dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 2443 2444 if (dev->postcopy_ufd == -1) { 2445 VHOST_LOG_CONFIG(ERR, "Userfaultfd not available: %s\n", 2446 strerror(errno)); 2447 return RTE_VHOST_MSG_RESULT_ERR; 2448 } 2449 api_struct.api = UFFD_API; 2450 api_struct.features = 0; 2451 if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) { 2452 VHOST_LOG_CONFIG(ERR, "UFFDIO_API ioctl failure: %s\n", 2453 strerror(errno)); 2454 close(dev->postcopy_ufd); 2455 dev->postcopy_ufd = -1; 2456 return RTE_VHOST_MSG_RESULT_ERR; 2457 } 2458 msg->fds[0] = dev->postcopy_ufd; 2459 msg->fd_num = 1; 2460 2461 return RTE_VHOST_MSG_RESULT_REPLY; 2462 #else 2463 dev->postcopy_ufd = -1; 2464 msg->fd_num = 0; 2465 2466 return RTE_VHOST_MSG_RESULT_ERR; 2467 #endif 2468 } 2469 2470 static int 2471 vhost_user_set_postcopy_listen(struct virtio_net **pdev, 2472 struct VhostUserMsg *msg __rte_unused, 2473 int main_fd __rte_unused) 2474 { 2475 struct virtio_net *dev = *pdev; 2476 2477 if (validate_msg_fds(msg, 0) != 0) 2478 return RTE_VHOST_MSG_RESULT_ERR; 2479 2480 if (dev->mem && dev->mem->nregions) { 2481 VHOST_LOG_CONFIG(ERR, 2482 "Regions already registered at postcopy-listen\n"); 2483 return RTE_VHOST_MSG_RESULT_ERR; 2484 } 2485 dev->postcopy_listening = 1; 2486 2487 return RTE_VHOST_MSG_RESULT_OK; 2488 } 2489 2490 static int 2491 vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg, 2492 int main_fd __rte_unused) 2493 { 2494 struct virtio_net *dev = *pdev; 2495 2496 if (validate_msg_fds(msg, 0) != 0) 2497 return RTE_VHOST_MSG_RESULT_ERR; 2498 2499 dev->postcopy_listening = 0; 2500 if (dev->postcopy_ufd >= 0) { 2501 close(dev->postcopy_ufd); 2502 dev->postcopy_ufd = -1; 2503 } 2504 2505 msg->payload.u64 = 0; 2506 msg->size = sizeof(msg->payload.u64); 2507 msg->fd_num = 0; 2508 2509 return RTE_VHOST_MSG_RESULT_REPLY; 2510 } 2511 2512 static int 2513 vhost_user_get_status(struct virtio_net **pdev, struct VhostUserMsg *msg, 2514 int main_fd __rte_unused) 2515 { 2516 struct virtio_net *dev = *pdev; 2517 2518 if (validate_msg_fds(msg, 0) != 0) 2519 return RTE_VHOST_MSG_RESULT_ERR; 2520 2521 msg->payload.u64 = dev->status; 2522 msg->size = sizeof(msg->payload.u64); 2523 msg->fd_num = 0; 2524 2525 return RTE_VHOST_MSG_RESULT_REPLY; 2526 } 2527 2528 static int 2529 vhost_user_set_status(struct virtio_net **pdev, struct VhostUserMsg *msg, 2530 int main_fd __rte_unused) 2531 { 2532 struct virtio_net *dev = *pdev; 2533 2534 if (validate_msg_fds(msg, 0) != 0) 2535 return RTE_VHOST_MSG_RESULT_ERR; 2536 2537 /* As per Virtio specification, the device status is 8bits long */ 2538 if (msg->payload.u64 > UINT8_MAX) { 2539 VHOST_LOG_CONFIG(ERR, "Invalid VHOST_USER_SET_STATUS payload 0x%" PRIx64 "\n", 2540 msg->payload.u64); 2541 return RTE_VHOST_MSG_RESULT_ERR; 2542 } 2543 2544 dev->status = msg->payload.u64; 2545 2546 if ((dev->status & VIRTIO_DEVICE_STATUS_FEATURES_OK) && 2547 (dev->flags & VIRTIO_DEV_FEATURES_FAILED)) { 2548 VHOST_LOG_CONFIG(ERR, "FEATURES_OK bit is set but feature negotiation failed\n"); 2549 /* 2550 * Clear the bit to let the driver know about the feature 2551 * negotiation failure 2552 */ 2553 dev->status &= ~VIRTIO_DEVICE_STATUS_FEATURES_OK; 2554 } 2555 2556 VHOST_LOG_CONFIG(INFO, "New device status(0x%08x):\n" 2557 "\t-RESET: %u\n" 2558 "\t-ACKNOWLEDGE: %u\n" 2559 "\t-DRIVER: %u\n" 2560 "\t-FEATURES_OK: %u\n" 2561 "\t-DRIVER_OK: %u\n" 2562 "\t-DEVICE_NEED_RESET: %u\n" 2563 "\t-FAILED: %u\n", 2564 dev->status, 2565 (dev->status == VIRTIO_DEVICE_STATUS_RESET), 2566 !!(dev->status & VIRTIO_DEVICE_STATUS_ACK), 2567 !!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER), 2568 !!(dev->status & VIRTIO_DEVICE_STATUS_FEATURES_OK), 2569 !!(dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK), 2570 !!(dev->status & VIRTIO_DEVICE_STATUS_DEV_NEED_RESET), 2571 !!(dev->status & VIRTIO_DEVICE_STATUS_FAILED)); 2572 2573 return RTE_VHOST_MSG_RESULT_OK; 2574 } 2575 2576 typedef int (*vhost_message_handler_t)(struct virtio_net **pdev, 2577 struct VhostUserMsg *msg, 2578 int main_fd); 2579 static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { 2580 [VHOST_USER_NONE] = NULL, 2581 [VHOST_USER_GET_FEATURES] = vhost_user_get_features, 2582 [VHOST_USER_SET_FEATURES] = vhost_user_set_features, 2583 [VHOST_USER_SET_OWNER] = vhost_user_set_owner, 2584 [VHOST_USER_RESET_OWNER] = vhost_user_reset_owner, 2585 [VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table, 2586 [VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base, 2587 [VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd, 2588 [VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num, 2589 [VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr, 2590 [VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base, 2591 [VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base, 2592 [VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick, 2593 [VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call, 2594 [VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err, 2595 [VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features, 2596 [VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features, 2597 [VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num, 2598 [VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable, 2599 [VHOST_USER_SEND_RARP] = vhost_user_send_rarp, 2600 [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu, 2601 [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd, 2602 [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg, 2603 [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, 2604 [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, 2605 [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, 2606 [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, 2607 [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, 2608 [VHOST_USER_SET_STATUS] = vhost_user_set_status, 2609 [VHOST_USER_GET_STATUS] = vhost_user_get_status, 2610 }; 2611 2612 /* return bytes# of read on success or negative val on failure. */ 2613 static int 2614 read_vhost_message(int sockfd, struct VhostUserMsg *msg) 2615 { 2616 int ret; 2617 2618 ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, 2619 msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num); 2620 if (ret <= 0) { 2621 return ret; 2622 } else if (ret != VHOST_USER_HDR_SIZE) { 2623 VHOST_LOG_CONFIG(ERR, "Unexpected header size read\n"); 2624 close_msg_fds(msg); 2625 return -1; 2626 } 2627 2628 if (msg->size) { 2629 if (msg->size > sizeof(msg->payload)) { 2630 VHOST_LOG_CONFIG(ERR, 2631 "invalid msg size: %d\n", msg->size); 2632 return -1; 2633 } 2634 ret = read(sockfd, &msg->payload, msg->size); 2635 if (ret <= 0) 2636 return ret; 2637 if (ret != (int)msg->size) { 2638 VHOST_LOG_CONFIG(ERR, 2639 "read control message failed\n"); 2640 return -1; 2641 } 2642 } 2643 2644 return ret; 2645 } 2646 2647 static int 2648 send_vhost_message(int sockfd, struct VhostUserMsg *msg) 2649 { 2650 if (!msg) 2651 return 0; 2652 2653 return send_fd_message(sockfd, (char *)msg, 2654 VHOST_USER_HDR_SIZE + msg->size, msg->fds, msg->fd_num); 2655 } 2656 2657 static int 2658 send_vhost_reply(int sockfd, struct VhostUserMsg *msg) 2659 { 2660 if (!msg) 2661 return 0; 2662 2663 msg->flags &= ~VHOST_USER_VERSION_MASK; 2664 msg->flags &= ~VHOST_USER_NEED_REPLY; 2665 msg->flags |= VHOST_USER_VERSION; 2666 msg->flags |= VHOST_USER_REPLY_MASK; 2667 2668 return send_vhost_message(sockfd, msg); 2669 } 2670 2671 static int 2672 send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg) 2673 { 2674 int ret; 2675 2676 if (msg->flags & VHOST_USER_NEED_REPLY) 2677 rte_spinlock_lock(&dev->slave_req_lock); 2678 2679 ret = send_vhost_message(dev->slave_req_fd, msg); 2680 if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY)) 2681 rte_spinlock_unlock(&dev->slave_req_lock); 2682 2683 return ret; 2684 } 2685 2686 /* 2687 * Allocate a queue pair if it hasn't been allocated yet 2688 */ 2689 static int 2690 vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, 2691 struct VhostUserMsg *msg) 2692 { 2693 uint32_t vring_idx; 2694 2695 switch (msg->request.master) { 2696 case VHOST_USER_SET_VRING_KICK: 2697 case VHOST_USER_SET_VRING_CALL: 2698 case VHOST_USER_SET_VRING_ERR: 2699 vring_idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; 2700 break; 2701 case VHOST_USER_SET_VRING_NUM: 2702 case VHOST_USER_SET_VRING_BASE: 2703 case VHOST_USER_SET_VRING_ENABLE: 2704 vring_idx = msg->payload.state.index; 2705 break; 2706 case VHOST_USER_SET_VRING_ADDR: 2707 vring_idx = msg->payload.addr.index; 2708 break; 2709 default: 2710 return 0; 2711 } 2712 2713 if (vring_idx >= VHOST_MAX_VRING) { 2714 VHOST_LOG_CONFIG(ERR, 2715 "invalid vring index: %u\n", vring_idx); 2716 return -1; 2717 } 2718 2719 if (dev->virtqueue[vring_idx]) 2720 return 0; 2721 2722 return alloc_vring_queue(dev, vring_idx); 2723 } 2724 2725 static void 2726 vhost_user_lock_all_queue_pairs(struct virtio_net *dev) 2727 { 2728 unsigned int i = 0; 2729 unsigned int vq_num = 0; 2730 2731 while (vq_num < dev->nr_vring) { 2732 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2733 2734 if (vq) { 2735 rte_spinlock_lock(&vq->access_lock); 2736 vq_num++; 2737 } 2738 i++; 2739 } 2740 } 2741 2742 static void 2743 vhost_user_unlock_all_queue_pairs(struct virtio_net *dev) 2744 { 2745 unsigned int i = 0; 2746 unsigned int vq_num = 0; 2747 2748 while (vq_num < dev->nr_vring) { 2749 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2750 2751 if (vq) { 2752 rte_spinlock_unlock(&vq->access_lock); 2753 vq_num++; 2754 } 2755 i++; 2756 } 2757 } 2758 2759 int 2760 vhost_user_msg_handler(int vid, int fd) 2761 { 2762 struct virtio_net *dev; 2763 struct VhostUserMsg msg; 2764 struct rte_vdpa_device *vdpa_dev; 2765 int ret; 2766 int unlock_required = 0; 2767 bool handled; 2768 int request; 2769 uint32_t i; 2770 2771 dev = get_device(vid); 2772 if (dev == NULL) 2773 return -1; 2774 2775 if (!dev->notify_ops) { 2776 dev->notify_ops = vhost_driver_callback_get(dev->ifname); 2777 if (!dev->notify_ops) { 2778 VHOST_LOG_CONFIG(ERR, 2779 "failed to get callback ops for driver %s\n", 2780 dev->ifname); 2781 return -1; 2782 } 2783 } 2784 2785 ret = read_vhost_message(fd, &msg); 2786 if (ret <= 0) { 2787 if (ret < 0) 2788 VHOST_LOG_CONFIG(ERR, 2789 "vhost read message failed\n"); 2790 else 2791 VHOST_LOG_CONFIG(INFO, 2792 "vhost peer closed\n"); 2793 2794 return -1; 2795 } 2796 2797 ret = 0; 2798 request = msg.request.master; 2799 if (request > VHOST_USER_NONE && request < VHOST_USER_MAX && 2800 vhost_message_str[request]) { 2801 if (request != VHOST_USER_IOTLB_MSG) 2802 VHOST_LOG_CONFIG(INFO, "read message %s\n", 2803 vhost_message_str[request]); 2804 else 2805 VHOST_LOG_CONFIG(DEBUG, "read message %s\n", 2806 vhost_message_str[request]); 2807 } else { 2808 VHOST_LOG_CONFIG(DEBUG, "External request %d\n", request); 2809 } 2810 2811 ret = vhost_user_check_and_alloc_queue_pair(dev, &msg); 2812 if (ret < 0) { 2813 VHOST_LOG_CONFIG(ERR, 2814 "failed to alloc queue\n"); 2815 return -1; 2816 } 2817 2818 /* 2819 * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE 2820 * and VHOST_USER_RESET_OWNER, since it is sent when virtio stops 2821 * and device is destroyed. destroy_device waits for queues to be 2822 * inactive, so it is safe. Otherwise taking the access_lock 2823 * would cause a dead lock. 2824 */ 2825 switch (request) { 2826 case VHOST_USER_SET_FEATURES: 2827 case VHOST_USER_SET_PROTOCOL_FEATURES: 2828 case VHOST_USER_SET_OWNER: 2829 case VHOST_USER_SET_MEM_TABLE: 2830 case VHOST_USER_SET_LOG_BASE: 2831 case VHOST_USER_SET_LOG_FD: 2832 case VHOST_USER_SET_VRING_NUM: 2833 case VHOST_USER_SET_VRING_ADDR: 2834 case VHOST_USER_SET_VRING_BASE: 2835 case VHOST_USER_SET_VRING_KICK: 2836 case VHOST_USER_SET_VRING_CALL: 2837 case VHOST_USER_SET_VRING_ERR: 2838 case VHOST_USER_SET_VRING_ENABLE: 2839 case VHOST_USER_SEND_RARP: 2840 case VHOST_USER_NET_SET_MTU: 2841 case VHOST_USER_SET_SLAVE_REQ_FD: 2842 if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { 2843 vhost_user_lock_all_queue_pairs(dev); 2844 unlock_required = 1; 2845 } 2846 break; 2847 default: 2848 break; 2849 2850 } 2851 2852 handled = false; 2853 if (dev->extern_ops.pre_msg_handle) { 2854 ret = (*dev->extern_ops.pre_msg_handle)(dev->vid, 2855 (void *)&msg); 2856 switch (ret) { 2857 case RTE_VHOST_MSG_RESULT_REPLY: 2858 send_vhost_reply(fd, &msg); 2859 /* Fall-through */ 2860 case RTE_VHOST_MSG_RESULT_ERR: 2861 case RTE_VHOST_MSG_RESULT_OK: 2862 handled = true; 2863 goto skip_to_post_handle; 2864 case RTE_VHOST_MSG_RESULT_NOT_HANDLED: 2865 default: 2866 break; 2867 } 2868 } 2869 2870 if (request > VHOST_USER_NONE && request < VHOST_USER_MAX) { 2871 if (!vhost_message_handlers[request]) 2872 goto skip_to_post_handle; 2873 ret = vhost_message_handlers[request](&dev, &msg, fd); 2874 2875 switch (ret) { 2876 case RTE_VHOST_MSG_RESULT_ERR: 2877 VHOST_LOG_CONFIG(ERR, 2878 "Processing %s failed.\n", 2879 vhost_message_str[request]); 2880 handled = true; 2881 break; 2882 case RTE_VHOST_MSG_RESULT_OK: 2883 VHOST_LOG_CONFIG(DEBUG, 2884 "Processing %s succeeded.\n", 2885 vhost_message_str[request]); 2886 handled = true; 2887 break; 2888 case RTE_VHOST_MSG_RESULT_REPLY: 2889 VHOST_LOG_CONFIG(DEBUG, 2890 "Processing %s succeeded and needs reply.\n", 2891 vhost_message_str[request]); 2892 send_vhost_reply(fd, &msg); 2893 handled = true; 2894 break; 2895 default: 2896 break; 2897 } 2898 } 2899 2900 skip_to_post_handle: 2901 if (ret != RTE_VHOST_MSG_RESULT_ERR && 2902 dev->extern_ops.post_msg_handle) { 2903 ret = (*dev->extern_ops.post_msg_handle)(dev->vid, 2904 (void *)&msg); 2905 switch (ret) { 2906 case RTE_VHOST_MSG_RESULT_REPLY: 2907 send_vhost_reply(fd, &msg); 2908 /* Fall-through */ 2909 case RTE_VHOST_MSG_RESULT_ERR: 2910 case RTE_VHOST_MSG_RESULT_OK: 2911 handled = true; 2912 case RTE_VHOST_MSG_RESULT_NOT_HANDLED: 2913 default: 2914 break; 2915 } 2916 } 2917 2918 if (unlock_required) 2919 vhost_user_unlock_all_queue_pairs(dev); 2920 2921 /* If message was not handled at this stage, treat it as an error */ 2922 if (!handled) { 2923 VHOST_LOG_CONFIG(ERR, 2924 "vhost message (req: %d) was not handled.\n", request); 2925 close_msg_fds(&msg); 2926 ret = RTE_VHOST_MSG_RESULT_ERR; 2927 } 2928 2929 /* 2930 * If the request required a reply that was already sent, 2931 * this optional reply-ack won't be sent as the 2932 * VHOST_USER_NEED_REPLY was cleared in send_vhost_reply(). 2933 */ 2934 if (msg.flags & VHOST_USER_NEED_REPLY) { 2935 msg.payload.u64 = ret == RTE_VHOST_MSG_RESULT_ERR; 2936 msg.size = sizeof(msg.payload.u64); 2937 msg.fd_num = 0; 2938 send_vhost_reply(fd, &msg); 2939 } else if (ret == RTE_VHOST_MSG_RESULT_ERR) { 2940 VHOST_LOG_CONFIG(ERR, 2941 "vhost message handling failed.\n"); 2942 return -1; 2943 } 2944 2945 for (i = 0; i < dev->nr_vring; i++) { 2946 struct vhost_virtqueue *vq = dev->virtqueue[i]; 2947 bool cur_ready = vq_is_ready(dev, vq); 2948 2949 if (cur_ready != (vq && vq->ready)) { 2950 vq->ready = cur_ready; 2951 vhost_user_notify_queue_state(dev, i, cur_ready); 2952 } 2953 } 2954 2955 2956 if (!virtio_is_ready(dev)) 2957 goto out; 2958 2959 /* 2960 * Virtio is now ready. If not done already, it is time 2961 * to notify the application it can process the rings and 2962 * configure the vDPA device if present. 2963 */ 2964 2965 if (!(dev->flags & VIRTIO_DEV_RUNNING)) { 2966 if (dev->notify_ops->new_device(dev->vid) == 0) 2967 dev->flags |= VIRTIO_DEV_RUNNING; 2968 } 2969 2970 vdpa_dev = dev->vdpa_dev; 2971 if (!vdpa_dev) 2972 goto out; 2973 2974 if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { 2975 if (vdpa_dev->ops->dev_conf(dev->vid)) 2976 VHOST_LOG_CONFIG(ERR, 2977 "Failed to configure vDPA device\n"); 2978 else 2979 dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; 2980 } 2981 2982 out: 2983 return 0; 2984 } 2985 2986 static int process_slave_message_reply(struct virtio_net *dev, 2987 const struct VhostUserMsg *msg) 2988 { 2989 struct VhostUserMsg msg_reply; 2990 int ret; 2991 2992 if ((msg->flags & VHOST_USER_NEED_REPLY) == 0) 2993 return 0; 2994 2995 ret = read_vhost_message(dev->slave_req_fd, &msg_reply); 2996 if (ret <= 0) { 2997 if (ret < 0) 2998 VHOST_LOG_CONFIG(ERR, 2999 "vhost read slave message reply failed\n"); 3000 else 3001 VHOST_LOG_CONFIG(INFO, 3002 "vhost peer closed\n"); 3003 ret = -1; 3004 goto out; 3005 } 3006 3007 ret = 0; 3008 if (msg_reply.request.slave != msg->request.slave) { 3009 VHOST_LOG_CONFIG(ERR, 3010 "Received unexpected msg type (%u), expected %u\n", 3011 msg_reply.request.slave, msg->request.slave); 3012 ret = -1; 3013 goto out; 3014 } 3015 3016 ret = msg_reply.payload.u64 ? -1 : 0; 3017 3018 out: 3019 rte_spinlock_unlock(&dev->slave_req_lock); 3020 return ret; 3021 } 3022 3023 int 3024 vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) 3025 { 3026 int ret; 3027 struct VhostUserMsg msg = { 3028 .request.slave = VHOST_USER_SLAVE_IOTLB_MSG, 3029 .flags = VHOST_USER_VERSION, 3030 .size = sizeof(msg.payload.iotlb), 3031 .payload.iotlb = { 3032 .iova = iova, 3033 .perm = perm, 3034 .type = VHOST_IOTLB_MISS, 3035 }, 3036 }; 3037 3038 ret = send_vhost_message(dev->slave_req_fd, &msg); 3039 if (ret < 0) { 3040 VHOST_LOG_CONFIG(ERR, 3041 "Failed to send IOTLB miss message (%d)\n", 3042 ret); 3043 return ret; 3044 } 3045 3046 return 0; 3047 } 3048 3049 static int 3050 vhost_user_slave_config_change(struct virtio_net *dev, bool need_reply) 3051 { 3052 int ret; 3053 struct VhostUserMsg msg = { 3054 .request.slave = VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, 3055 .flags = VHOST_USER_VERSION, 3056 .size = 0, 3057 }; 3058 3059 if (need_reply) 3060 msg.flags |= VHOST_USER_NEED_REPLY; 3061 3062 ret = send_vhost_slave_message(dev, &msg); 3063 if (ret < 0) { 3064 VHOST_LOG_CONFIG(ERR, 3065 "Failed to send config change (%d)\n", 3066 ret); 3067 return ret; 3068 } 3069 3070 return process_slave_message_reply(dev, &msg); 3071 } 3072 3073 int 3074 rte_vhost_slave_config_change(int vid, bool need_reply) 3075 { 3076 struct virtio_net *dev; 3077 3078 dev = get_device(vid); 3079 if (!dev) 3080 return -ENODEV; 3081 3082 return vhost_user_slave_config_change(dev, need_reply); 3083 } 3084 3085 static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev, 3086 int index, int fd, 3087 uint64_t offset, 3088 uint64_t size) 3089 { 3090 int ret; 3091 struct VhostUserMsg msg = { 3092 .request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, 3093 .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY, 3094 .size = sizeof(msg.payload.area), 3095 .payload.area = { 3096 .u64 = index & VHOST_USER_VRING_IDX_MASK, 3097 .size = size, 3098 .offset = offset, 3099 }, 3100 }; 3101 3102 if (fd < 0) 3103 msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK; 3104 else { 3105 msg.fds[0] = fd; 3106 msg.fd_num = 1; 3107 } 3108 3109 ret = send_vhost_slave_message(dev, &msg); 3110 if (ret < 0) { 3111 VHOST_LOG_CONFIG(ERR, 3112 "Failed to set host notifier (%d)\n", ret); 3113 return ret; 3114 } 3115 3116 return process_slave_message_reply(dev, &msg); 3117 } 3118 3119 int rte_vhost_host_notifier_ctrl(int vid, uint16_t qid, bool enable) 3120 { 3121 struct virtio_net *dev; 3122 struct rte_vdpa_device *vdpa_dev; 3123 int vfio_device_fd, ret = 0; 3124 uint64_t offset, size; 3125 unsigned int i, q_start, q_last; 3126 3127 dev = get_device(vid); 3128 if (!dev) 3129 return -ENODEV; 3130 3131 vdpa_dev = dev->vdpa_dev; 3132 if (vdpa_dev == NULL) 3133 return -ENODEV; 3134 3135 if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) || 3136 !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) || 3137 !(dev->protocol_features & 3138 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) || 3139 !(dev->protocol_features & 3140 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) || 3141 !(dev->protocol_features & 3142 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))) 3143 return -ENOTSUP; 3144 3145 if (qid == RTE_VHOST_QUEUE_ALL) { 3146 q_start = 0; 3147 q_last = dev->nr_vring - 1; 3148 } else { 3149 if (qid >= dev->nr_vring) 3150 return -EINVAL; 3151 q_start = qid; 3152 q_last = qid; 3153 } 3154 3155 RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP); 3156 RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP); 3157 3158 vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid); 3159 if (vfio_device_fd < 0) 3160 return -ENOTSUP; 3161 3162 if (enable) { 3163 for (i = q_start; i <= q_last; i++) { 3164 if (vdpa_dev->ops->get_notify_area(vid, i, &offset, 3165 &size) < 0) { 3166 ret = -ENOTSUP; 3167 goto disable; 3168 } 3169 3170 if (vhost_user_slave_set_vring_host_notifier(dev, i, 3171 vfio_device_fd, offset, size) < 0) { 3172 ret = -EFAULT; 3173 goto disable; 3174 } 3175 } 3176 } else { 3177 disable: 3178 for (i = q_start; i <= q_last; i++) { 3179 vhost_user_slave_set_vring_host_notifier(dev, i, -1, 3180 0, 0); 3181 } 3182 } 3183 3184 return ret; 3185 } 3186