1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/socket.h> 12 #include <sys/un.h> 13 #include <sys/queue.h> 14 #include <errno.h> 15 #include <fcntl.h> 16 17 #include <rte_thread.h> 18 #include <rte_log.h> 19 20 #include "fd_man.h" 21 #include "vduse.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool is_vduse; 40 bool reconnect; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 bool extbuf; 44 bool linearbuf; 45 bool async_copy; 46 bool net_compliant_ol_flags; 47 bool stats_enabled; 48 bool async_connect; 49 50 /* 51 * The "supported_features" indicates the feature bits the 52 * vhost driver supports. The "features" indicates the feature 53 * bits after the rte_vhost_driver_features_disable/enable(). 54 * It is also the final feature bits used for vhost-user 55 * features negotiation. 56 */ 57 uint64_t supported_features; 58 uint64_t features; 59 60 uint64_t protocol_features; 61 62 uint32_t max_queue_pairs; 63 64 struct rte_vdpa_device *vdpa_dev; 65 66 struct rte_vhost_device_ops const *notify_ops; 67 }; 68 69 struct vhost_user_connection { 70 struct vhost_user_socket *vsocket; 71 int connfd; 72 int vid; 73 74 TAILQ_ENTRY(vhost_user_connection) next; 75 }; 76 77 #define MAX_VHOST_SOCKET 1024 78 struct vhost_user { 79 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 80 struct fdset *fdset; 81 int vsocket_cnt; 82 pthread_mutex_t mutex; 83 }; 84 85 #define MAX_VIRTIO_BACKLOG 128 86 87 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 88 static void vhost_user_read_cb(int fd, void *dat, int *remove); 89 static int create_unix_socket(struct vhost_user_socket *vsocket); 90 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 91 92 static struct vhost_user vhost_user = { 93 .vsocket_cnt = 0, 94 .mutex = PTHREAD_MUTEX_INITIALIZER, 95 }; 96 97 /* 98 * return bytes# of read on success or negative val on failure. Update fdnum 99 * with number of fds read. 100 */ 101 int 102 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds, 103 int *fd_num) 104 { 105 struct iovec iov; 106 struct msghdr msgh; 107 char control[CMSG_SPACE(max_fds * sizeof(int))]; 108 struct cmsghdr *cmsg; 109 int got_fds = 0; 110 int ret; 111 112 *fd_num = 0; 113 114 memset(&msgh, 0, sizeof(msgh)); 115 iov.iov_base = buf; 116 iov.iov_len = buflen; 117 118 msgh.msg_iov = &iov; 119 msgh.msg_iovlen = 1; 120 msgh.msg_control = control; 121 msgh.msg_controllen = sizeof(control); 122 123 ret = recvmsg(sockfd, &msgh, 0); 124 if (ret <= 0) { 125 if (ret) 126 VHOST_CONFIG_LOG(ifname, ERR, "recvmsg failed on fd %d (%s)", 127 sockfd, strerror(errno)); 128 return ret; 129 } 130 131 if (msgh.msg_flags & MSG_TRUNC) 132 VHOST_CONFIG_LOG(ifname, ERR, "truncated msg (fd %d)", sockfd); 133 134 /* MSG_CTRUNC may be caused by LSM misconfiguration */ 135 if (msgh.msg_flags & MSG_CTRUNC) 136 VHOST_CONFIG_LOG(ifname, ERR, "truncated control data (fd %d)", sockfd); 137 138 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 139 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 140 if ((cmsg->cmsg_level == SOL_SOCKET) && 141 (cmsg->cmsg_type == SCM_RIGHTS)) { 142 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 143 *fd_num = got_fds; 144 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 145 break; 146 } 147 } 148 149 /* Clear out unused file descriptors */ 150 while (got_fds < max_fds) 151 fds[got_fds++] = -1; 152 153 return ret; 154 } 155 156 int 157 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num) 158 { 159 160 struct iovec iov; 161 struct msghdr msgh; 162 size_t fdsize = fd_num * sizeof(int); 163 char control[CMSG_SPACE(fdsize)]; 164 struct cmsghdr *cmsg; 165 int ret; 166 167 memset(&msgh, 0, sizeof(msgh)); 168 iov.iov_base = buf; 169 iov.iov_len = buflen; 170 171 msgh.msg_iov = &iov; 172 msgh.msg_iovlen = 1; 173 174 if (fds && fd_num > 0) { 175 msgh.msg_control = control; 176 msgh.msg_controllen = sizeof(control); 177 cmsg = CMSG_FIRSTHDR(&msgh); 178 if (cmsg == NULL) { 179 VHOST_CONFIG_LOG(ifname, ERR, "cmsg == NULL"); 180 errno = EINVAL; 181 return -1; 182 } 183 cmsg->cmsg_len = CMSG_LEN(fdsize); 184 cmsg->cmsg_level = SOL_SOCKET; 185 cmsg->cmsg_type = SCM_RIGHTS; 186 memcpy(CMSG_DATA(cmsg), fds, fdsize); 187 } else { 188 msgh.msg_control = NULL; 189 msgh.msg_controllen = 0; 190 } 191 192 do { 193 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 194 } while (ret < 0 && errno == EINTR); 195 196 if (ret < 0) { 197 VHOST_CONFIG_LOG(ifname, ERR, "sendmsg error on fd %d (%s)", 198 sockfd, strerror(errno)); 199 return ret; 200 } 201 202 return ret; 203 } 204 205 static void 206 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 207 { 208 int vid; 209 size_t size; 210 struct vhost_user_connection *conn; 211 int ret; 212 struct virtio_net *dev; 213 214 if (vsocket == NULL) 215 return; 216 217 conn = malloc(sizeof(*conn)); 218 if (conn == NULL) { 219 close(fd); 220 return; 221 } 222 223 vid = vhost_user_new_device(); 224 if (vid == -1) { 225 goto err; 226 } 227 228 size = strnlen(vsocket->path, PATH_MAX); 229 vhost_set_ifname(vid, vsocket->path, size); 230 231 vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net, 232 vsocket->net_compliant_ol_flags, vsocket->stats_enabled, 233 vsocket->iommu_support); 234 235 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev); 236 237 if (vsocket->extbuf) 238 vhost_enable_extbuf(vid); 239 240 if (vsocket->linearbuf) 241 vhost_enable_linearbuf(vid); 242 243 if (vsocket->async_copy) { 244 dev = get_device(vid); 245 246 if (dev) 247 dev->async_copy = 1; 248 } 249 250 VHOST_CONFIG_LOG(vsocket->path, INFO, "new device, handle is %d", vid); 251 252 if (vsocket->notify_ops->new_connection) { 253 ret = vsocket->notify_ops->new_connection(vid); 254 if (ret < 0) { 255 VHOST_CONFIG_LOG(vsocket->path, ERR, 256 "failed to add vhost user connection with fd %d", 257 fd); 258 goto err_cleanup; 259 } 260 } 261 262 conn->connfd = fd; 263 conn->vsocket = vsocket; 264 conn->vid = vid; 265 ret = fdset_add(vhost_user.fdset, fd, vhost_user_read_cb, 266 NULL, conn); 267 if (ret < 0) { 268 VHOST_CONFIG_LOG(vsocket->path, ERR, 269 "failed to add fd %d into vhost server fdset", 270 fd); 271 272 if (vsocket->notify_ops->destroy_connection) 273 vsocket->notify_ops->destroy_connection(conn->vid); 274 275 goto err_cleanup; 276 } 277 278 pthread_mutex_lock(&vsocket->conn_mutex); 279 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 280 pthread_mutex_unlock(&vsocket->conn_mutex); 281 282 return; 283 284 err_cleanup: 285 vhost_destroy_device(vid); 286 err: 287 free(conn); 288 close(fd); 289 } 290 291 /* call back when there is new vhost-user connection from client */ 292 static void 293 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 294 { 295 struct vhost_user_socket *vsocket = dat; 296 297 fd = accept(fd, NULL, NULL); 298 if (fd < 0) 299 return; 300 301 VHOST_CONFIG_LOG(vsocket->path, INFO, "new vhost user connection is %d", fd); 302 vhost_user_add_connection(fd, vsocket); 303 } 304 305 static void 306 vhost_user_read_cb(int connfd, void *dat, int *remove) 307 { 308 struct vhost_user_connection *conn = dat; 309 struct vhost_user_socket *vsocket = conn->vsocket; 310 int ret; 311 312 ret = vhost_user_msg_handler(conn->vid, connfd); 313 if (ret < 0) { 314 struct virtio_net *dev = get_device(conn->vid); 315 316 close(connfd); 317 *remove = 1; 318 319 if (dev) 320 vhost_destroy_device_notify(dev); 321 322 if (vsocket->notify_ops->destroy_connection) 323 vsocket->notify_ops->destroy_connection(conn->vid); 324 325 vhost_destroy_device(conn->vid); 326 327 if (vsocket->reconnect) { 328 create_unix_socket(vsocket); 329 vhost_user_start_client(vsocket); 330 } 331 332 pthread_mutex_lock(&vsocket->conn_mutex); 333 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 334 pthread_mutex_unlock(&vsocket->conn_mutex); 335 336 free(conn); 337 } 338 } 339 340 static int 341 create_unix_socket(struct vhost_user_socket *vsocket) 342 { 343 int fd; 344 struct sockaddr_un *un = &vsocket->un; 345 346 fd = socket(AF_UNIX, SOCK_STREAM, 0); 347 if (fd < 0) 348 return -1; 349 VHOST_CONFIG_LOG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d", 350 vsocket->is_server ? "server" : "client", fd); 351 352 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 353 VHOST_CONFIG_LOG(vsocket->path, ERR, 354 "vhost-user: can't set nonblocking mode for socket, fd: %d (%s)", 355 fd, strerror(errno)); 356 close(fd); 357 return -1; 358 } 359 360 memset(un, 0, sizeof(*un)); 361 un->sun_family = AF_UNIX; 362 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 363 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 364 365 vsocket->socket_fd = fd; 366 return 0; 367 } 368 369 static int 370 vhost_user_start_server(struct vhost_user_socket *vsocket) 371 { 372 int ret; 373 int fd = vsocket->socket_fd; 374 const char *path = vsocket->path; 375 376 /* 377 * bind () may fail if the socket file with the same name already 378 * exists. But the library obviously should not delete the file 379 * provided by the user, since we can not be sure that it is not 380 * being used by other applications. Moreover, many applications form 381 * socket names based on user input, which is prone to errors. 382 * 383 * The user must ensure that the socket does not exist before 384 * registering the vhost driver in server mode. 385 */ 386 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 387 if (ret < 0) { 388 VHOST_CONFIG_LOG(path, ERR, "failed to bind: %s; remove it and try again", 389 strerror(errno)); 390 goto err; 391 } 392 VHOST_CONFIG_LOG(path, INFO, "binding succeeded"); 393 394 ret = listen(fd, MAX_VIRTIO_BACKLOG); 395 if (ret < 0) 396 goto err; 397 398 ret = fdset_add(vhost_user.fdset, fd, vhost_user_server_new_connection, 399 NULL, vsocket); 400 if (ret < 0) { 401 VHOST_CONFIG_LOG(path, ERR, "failed to add listen fd %d to vhost server fdset", 402 fd); 403 goto err; 404 } 405 406 return 0; 407 408 err: 409 close(fd); 410 return -1; 411 } 412 413 struct vhost_user_reconnect { 414 struct sockaddr_un un; 415 int fd; 416 struct vhost_user_socket *vsocket; 417 418 TAILQ_ENTRY(vhost_user_reconnect) next; 419 }; 420 421 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 422 struct vhost_user_reconnect_list { 423 struct vhost_user_reconnect_tailq_list head; 424 pthread_mutex_t mutex; 425 }; 426 427 static struct vhost_user_reconnect_list reconn_list; 428 static rte_thread_t reconn_tid; 429 430 static int 431 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz) 432 { 433 int ret, flags; 434 435 ret = connect(fd, un, sz); 436 if (ret < 0 && errno != EISCONN) 437 return -1; 438 439 flags = fcntl(fd, F_GETFL, 0); 440 if (flags < 0) { 441 VHOST_CONFIG_LOG(path, ERR, "can't get flags for connfd %d (%s)", 442 fd, strerror(errno)); 443 return -2; 444 } 445 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 446 VHOST_CONFIG_LOG(path, ERR, "can't disable nonblocking on fd %d", fd); 447 return -2; 448 } 449 return 0; 450 } 451 452 static uint32_t 453 vhost_user_client_reconnect(void *arg __rte_unused) 454 { 455 int ret; 456 struct vhost_user_reconnect *reconn, *next; 457 458 while (1) { 459 pthread_mutex_lock(&reconn_list.mutex); 460 461 /* 462 * An equal implementation of TAILQ_FOREACH_SAFE, 463 * which does not exist on all platforms. 464 */ 465 for (reconn = TAILQ_FIRST(&reconn_list.head); 466 reconn != NULL; reconn = next) { 467 next = TAILQ_NEXT(reconn, next); 468 469 ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd, 470 (struct sockaddr *)&reconn->un, 471 sizeof(reconn->un)); 472 if (ret == -2) { 473 close(reconn->fd); 474 VHOST_CONFIG_LOG(reconn->vsocket->path, ERR, 475 "reconnection for fd %d failed", 476 reconn->fd); 477 goto remove_fd; 478 } 479 if (ret == -1) 480 continue; 481 482 VHOST_CONFIG_LOG(reconn->vsocket->path, INFO, "connected"); 483 vhost_user_add_connection(reconn->fd, reconn->vsocket); 484 remove_fd: 485 TAILQ_REMOVE(&reconn_list.head, reconn, next); 486 free(reconn); 487 } 488 489 pthread_mutex_unlock(&reconn_list.mutex); 490 sleep(1); 491 } 492 493 return 0; 494 } 495 496 static int 497 vhost_user_reconnect_init(void) 498 { 499 int ret; 500 501 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 502 if (ret < 0) { 503 VHOST_CONFIG_LOG("thread", ERR, "%s: failed to initialize mutex", __func__); 504 return ret; 505 } 506 TAILQ_INIT(&reconn_list.head); 507 508 ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco", 509 vhost_user_client_reconnect, NULL); 510 if (ret != 0) { 511 VHOST_CONFIG_LOG("thread", ERR, "failed to create reconnect thread"); 512 if (pthread_mutex_destroy(&reconn_list.mutex)) 513 VHOST_CONFIG_LOG("thread", ERR, 514 "%s: failed to destroy reconnect mutex", 515 __func__); 516 } 517 518 return ret; 519 } 520 521 static int 522 vhost_user_start_client(struct vhost_user_socket *vsocket) 523 { 524 int ret; 525 int fd = vsocket->socket_fd; 526 const char *path = vsocket->path; 527 struct vhost_user_reconnect *reconn; 528 529 if (!vsocket->async_connect || !vsocket->reconnect) { 530 ret = vhost_user_connect_nonblock(vsocket->path, fd, 531 (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 532 if (ret == 0) { 533 vhost_user_add_connection(fd, vsocket); 534 return 0; 535 } 536 537 VHOST_CONFIG_LOG(path, WARNING, "failed to connect: %s", strerror(errno)); 538 539 if (ret == -2 || !vsocket->reconnect) { 540 close(fd); 541 return -1; 542 } 543 544 VHOST_CONFIG_LOG(path, INFO, "reconnecting..."); 545 } 546 reconn = malloc(sizeof(*reconn)); 547 if (reconn == NULL) { 548 VHOST_CONFIG_LOG(path, ERR, "failed to allocate memory for reconnect"); 549 close(fd); 550 return -1; 551 } 552 reconn->un = vsocket->un; 553 reconn->fd = fd; 554 reconn->vsocket = vsocket; 555 pthread_mutex_lock(&reconn_list.mutex); 556 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 557 pthread_mutex_unlock(&reconn_list.mutex); 558 559 return 0; 560 } 561 562 static struct vhost_user_socket * 563 find_vhost_user_socket(const char *path) 564 { 565 int i; 566 567 if (path == NULL) 568 return NULL; 569 570 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 571 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 572 573 if (!strcmp(vsocket->path, path)) 574 return vsocket; 575 } 576 577 return NULL; 578 } 579 580 int 581 rte_vhost_driver_attach_vdpa_device(const char *path, 582 struct rte_vdpa_device *dev) 583 { 584 struct vhost_user_socket *vsocket; 585 586 if (dev == NULL || path == NULL) 587 return -1; 588 589 pthread_mutex_lock(&vhost_user.mutex); 590 vsocket = find_vhost_user_socket(path); 591 if (vsocket) 592 vsocket->vdpa_dev = dev; 593 pthread_mutex_unlock(&vhost_user.mutex); 594 595 return vsocket ? 0 : -1; 596 } 597 598 int 599 rte_vhost_driver_detach_vdpa_device(const char *path) 600 { 601 struct vhost_user_socket *vsocket; 602 603 pthread_mutex_lock(&vhost_user.mutex); 604 vsocket = find_vhost_user_socket(path); 605 if (vsocket) 606 vsocket->vdpa_dev = NULL; 607 pthread_mutex_unlock(&vhost_user.mutex); 608 609 return vsocket ? 0 : -1; 610 } 611 612 struct rte_vdpa_device * 613 rte_vhost_driver_get_vdpa_device(const char *path) 614 { 615 struct vhost_user_socket *vsocket; 616 struct rte_vdpa_device *dev = NULL; 617 618 pthread_mutex_lock(&vhost_user.mutex); 619 vsocket = find_vhost_user_socket(path); 620 if (vsocket) 621 dev = vsocket->vdpa_dev; 622 pthread_mutex_unlock(&vhost_user.mutex); 623 624 return dev; 625 } 626 627 int 628 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type) 629 { 630 struct vhost_user_socket *vsocket; 631 struct rte_vdpa_device *vdpa_dev; 632 int ret = 0; 633 634 pthread_mutex_lock(&vhost_user.mutex); 635 vsocket = find_vhost_user_socket(path); 636 if (!vsocket) { 637 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 638 ret = -1; 639 goto unlock_exit; 640 } 641 642 vdpa_dev = vsocket->vdpa_dev; 643 if (!vdpa_dev) { 644 ret = -1; 645 goto unlock_exit; 646 } 647 648 *type = vdpa_dev->type; 649 650 unlock_exit: 651 pthread_mutex_unlock(&vhost_user.mutex); 652 return ret; 653 } 654 655 int 656 rte_vhost_driver_disable_features(const char *path, uint64_t features) 657 { 658 struct vhost_user_socket *vsocket; 659 660 pthread_mutex_lock(&vhost_user.mutex); 661 vsocket = find_vhost_user_socket(path); 662 663 /* Note that use_builtin_virtio_net is not affected by this function 664 * since callers may want to selectively disable features of the 665 * built-in vhost net device backend. 666 */ 667 668 if (vsocket) 669 vsocket->features &= ~features; 670 pthread_mutex_unlock(&vhost_user.mutex); 671 672 return vsocket ? 0 : -1; 673 } 674 675 int 676 rte_vhost_driver_enable_features(const char *path, uint64_t features) 677 { 678 struct vhost_user_socket *vsocket; 679 680 pthread_mutex_lock(&vhost_user.mutex); 681 vsocket = find_vhost_user_socket(path); 682 if (vsocket) { 683 if ((vsocket->supported_features & features) != features) { 684 /* 685 * trying to enable features the driver doesn't 686 * support. 687 */ 688 pthread_mutex_unlock(&vhost_user.mutex); 689 return -1; 690 } 691 vsocket->features |= features; 692 } 693 pthread_mutex_unlock(&vhost_user.mutex); 694 695 return vsocket ? 0 : -1; 696 } 697 698 int 699 rte_vhost_driver_set_features(const char *path, uint64_t features) 700 { 701 struct vhost_user_socket *vsocket; 702 703 pthread_mutex_lock(&vhost_user.mutex); 704 vsocket = find_vhost_user_socket(path); 705 if (vsocket) { 706 vsocket->supported_features = features; 707 vsocket->features = features; 708 709 /* Anyone setting feature bits is implementing their own vhost 710 * device backend. 711 */ 712 vsocket->use_builtin_virtio_net = false; 713 } 714 pthread_mutex_unlock(&vhost_user.mutex); 715 716 return vsocket ? 0 : -1; 717 } 718 719 int 720 rte_vhost_driver_get_features(const char *path, uint64_t *features) 721 { 722 struct vhost_user_socket *vsocket; 723 uint64_t vdpa_features; 724 struct rte_vdpa_device *vdpa_dev; 725 int ret = 0; 726 727 pthread_mutex_lock(&vhost_user.mutex); 728 vsocket = find_vhost_user_socket(path); 729 if (!vsocket) { 730 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 731 ret = -1; 732 goto unlock_exit; 733 } 734 735 vdpa_dev = vsocket->vdpa_dev; 736 if (!vdpa_dev) { 737 *features = vsocket->features; 738 goto unlock_exit; 739 } 740 741 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) { 742 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa features for socket file."); 743 ret = -1; 744 goto unlock_exit; 745 } 746 747 *features = vsocket->features & vdpa_features; 748 749 unlock_exit: 750 pthread_mutex_unlock(&vhost_user.mutex); 751 return ret; 752 } 753 754 int 755 rte_vhost_driver_set_protocol_features(const char *path, 756 uint64_t protocol_features) 757 { 758 struct vhost_user_socket *vsocket; 759 760 pthread_mutex_lock(&vhost_user.mutex); 761 vsocket = find_vhost_user_socket(path); 762 if (vsocket) 763 vsocket->protocol_features = protocol_features; 764 pthread_mutex_unlock(&vhost_user.mutex); 765 return vsocket ? 0 : -1; 766 } 767 768 int 769 rte_vhost_driver_get_protocol_features(const char *path, 770 uint64_t *protocol_features) 771 { 772 struct vhost_user_socket *vsocket; 773 uint64_t vdpa_protocol_features; 774 struct rte_vdpa_device *vdpa_dev; 775 int ret = 0; 776 777 pthread_mutex_lock(&vhost_user.mutex); 778 vsocket = find_vhost_user_socket(path); 779 if (!vsocket) { 780 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 781 ret = -1; 782 goto unlock_exit; 783 } 784 785 vdpa_dev = vsocket->vdpa_dev; 786 if (!vdpa_dev) { 787 *protocol_features = vsocket->protocol_features; 788 goto unlock_exit; 789 } 790 791 if (vdpa_dev->ops->get_protocol_features(vdpa_dev, 792 &vdpa_protocol_features) < 0) { 793 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa protocol features."); 794 ret = -1; 795 goto unlock_exit; 796 } 797 798 *protocol_features = vsocket->protocol_features 799 & vdpa_protocol_features; 800 801 unlock_exit: 802 pthread_mutex_unlock(&vhost_user.mutex); 803 return ret; 804 } 805 806 int 807 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 808 { 809 struct vhost_user_socket *vsocket; 810 uint32_t vdpa_queue_num; 811 struct rte_vdpa_device *vdpa_dev; 812 int ret = 0; 813 814 pthread_mutex_lock(&vhost_user.mutex); 815 vsocket = find_vhost_user_socket(path); 816 if (!vsocket) { 817 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 818 ret = -1; 819 goto unlock_exit; 820 } 821 822 vdpa_dev = vsocket->vdpa_dev; 823 if (!vdpa_dev) { 824 *queue_num = vsocket->max_queue_pairs; 825 goto unlock_exit; 826 } 827 828 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) { 829 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa queue number."); 830 ret = -1; 831 goto unlock_exit; 832 } 833 834 *queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num); 835 836 unlock_exit: 837 pthread_mutex_unlock(&vhost_user.mutex); 838 return ret; 839 } 840 841 int 842 rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs) 843 { 844 struct vhost_user_socket *vsocket; 845 int ret = 0; 846 847 VHOST_CONFIG_LOG(path, INFO, "Setting max queue pairs to %u", max_queue_pairs); 848 849 if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) { 850 VHOST_CONFIG_LOG(path, ERR, "Library only supports up to %u queue pairs", 851 VHOST_MAX_QUEUE_PAIRS); 852 return -1; 853 } 854 855 pthread_mutex_lock(&vhost_user.mutex); 856 vsocket = find_vhost_user_socket(path); 857 if (!vsocket) { 858 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 859 ret = -1; 860 goto unlock_exit; 861 } 862 863 /* 864 * This is only useful for VDUSE for which number of virtqueues is set 865 * by the backend. For Vhost-user, the number of virtqueues is defined 866 * by the frontend. 867 */ 868 if (!vsocket->is_vduse) { 869 VHOST_CONFIG_LOG(path, DEBUG, 870 "Keeping %u max queue pairs for Vhost-user backend", 871 VHOST_MAX_QUEUE_PAIRS); 872 goto unlock_exit; 873 } 874 875 vsocket->max_queue_pairs = max_queue_pairs; 876 877 unlock_exit: 878 pthread_mutex_unlock(&vhost_user.mutex); 879 return ret; 880 } 881 882 static void 883 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 884 { 885 if (vsocket == NULL) 886 return; 887 888 free(vsocket->path); 889 free(vsocket); 890 } 891 892 /* 893 * Register a new vhost-user socket; here we could act as server 894 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 895 * is set. 896 */ 897 int 898 rte_vhost_driver_register(const char *path, uint64_t flags) 899 { 900 int ret = -1; 901 struct vhost_user_socket *vsocket; 902 903 if (!path) 904 return -1; 905 906 pthread_mutex_lock(&vhost_user.mutex); 907 908 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 909 VHOST_CONFIG_LOG(path, ERR, "the number of vhost sockets reaches maximum"); 910 goto out; 911 } 912 913 vsocket = malloc(sizeof(struct vhost_user_socket)); 914 if (!vsocket) 915 goto out; 916 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 917 vsocket->path = strdup(path); 918 if (vsocket->path == NULL) { 919 VHOST_CONFIG_LOG(path, ERR, "failed to copy socket path string"); 920 vhost_user_socket_mem_free(vsocket); 921 goto out; 922 } 923 TAILQ_INIT(&vsocket->conn_list); 924 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 925 if (ret) { 926 VHOST_CONFIG_LOG(path, ERR, "failed to init connection mutex"); 927 goto out_free; 928 } 929 930 if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/"))) 931 vsocket->is_vduse = true; 932 933 vsocket->vdpa_dev = NULL; 934 vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS; 935 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; 936 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; 937 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; 938 vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 939 vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE; 940 vsocket->async_connect = flags & RTE_VHOST_USER_ASYNC_CONNECT; 941 if (vsocket->is_vduse) 942 vsocket->iommu_support = true; 943 else 944 vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT; 945 946 if (vsocket->async_copy && (vsocket->iommu_support || 947 (flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) { 948 VHOST_CONFIG_LOG(path, ERR, "async copy with IOMMU or post-copy not supported"); 949 goto out_mutex; 950 } 951 952 /* 953 * Set the supported features correctly for the builtin vhost-user 954 * net driver. 955 * 956 * Applications know nothing about features the builtin virtio net 957 * driver (virtio_net.c) supports, thus it's not possible for them 958 * to invoke rte_vhost_driver_set_features(). To workaround it, here 959 * we set it unconditionally. If the application want to implement 960 * another vhost-user driver (say SCSI), it should call the 961 * rte_vhost_driver_set_features(), which will overwrite following 962 * two values. 963 */ 964 vsocket->use_builtin_virtio_net = true; 965 if (vsocket->is_vduse) { 966 vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES; 967 vsocket->features = VDUSE_NET_SUPPORTED_FEATURES; 968 } else { 969 vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES; 970 vsocket->features = VHOST_USER_NET_SUPPORTED_FEATURES; 971 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 972 } 973 974 if (vsocket->async_copy) { 975 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL); 976 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL); 977 VHOST_CONFIG_LOG(path, INFO, "logging feature is disabled in async copy mode"); 978 } 979 980 /* 981 * We'll not be able to receive a buffer from guest in linear mode 982 * without external buffer if it will not fit in a single mbuf, which is 983 * likely if segmentation offloading enabled. 984 */ 985 if (vsocket->linearbuf && !vsocket->extbuf) { 986 uint64_t seg_offload_features = 987 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 988 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 989 (1ULL << VIRTIO_NET_F_HOST_UFO); 990 991 VHOST_CONFIG_LOG(path, INFO, "Linear buffers requested without external buffers,"); 992 VHOST_CONFIG_LOG(path, INFO, "disabling host segmentation offloading support"); 993 vsocket->supported_features &= ~seg_offload_features; 994 vsocket->features &= ~seg_offload_features; 995 } 996 997 if (!vsocket->iommu_support) { 998 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 999 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 1000 } 1001 1002 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 1003 vsocket->protocol_features &= 1004 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 1005 } else { 1006 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 1007 VHOST_CONFIG_LOG(path, ERR, "Postcopy requested but not compiled"); 1008 ret = -1; 1009 goto out_mutex; 1010 #endif 1011 } 1012 1013 if (!vsocket->is_vduse) { 1014 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 1015 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 1016 if (vsocket->reconnect && reconn_tid.opaque_id == 0) { 1017 if (vhost_user_reconnect_init() != 0) 1018 goto out_mutex; 1019 } 1020 } else { 1021 vsocket->is_server = true; 1022 } 1023 ret = create_unix_socket(vsocket); 1024 if (ret < 0) 1025 goto out_mutex; 1026 } 1027 1028 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 1029 1030 pthread_mutex_unlock(&vhost_user.mutex); 1031 return ret; 1032 1033 out_mutex: 1034 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 1035 VHOST_CONFIG_LOG(path, ERR, "failed to destroy connection mutex"); 1036 } 1037 out_free: 1038 vhost_user_socket_mem_free(vsocket); 1039 out: 1040 pthread_mutex_unlock(&vhost_user.mutex); 1041 1042 return ret; 1043 } 1044 1045 static bool 1046 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 1047 { 1048 int found = false; 1049 struct vhost_user_reconnect *reconn, *next; 1050 1051 pthread_mutex_lock(&reconn_list.mutex); 1052 1053 for (reconn = TAILQ_FIRST(&reconn_list.head); 1054 reconn != NULL; reconn = next) { 1055 next = TAILQ_NEXT(reconn, next); 1056 1057 if (reconn->vsocket == vsocket) { 1058 TAILQ_REMOVE(&reconn_list.head, reconn, next); 1059 close(reconn->fd); 1060 free(reconn); 1061 found = true; 1062 break; 1063 } 1064 } 1065 pthread_mutex_unlock(&reconn_list.mutex); 1066 return found; 1067 } 1068 1069 /** 1070 * Unregister the specified vhost socket 1071 */ 1072 int 1073 rte_vhost_driver_unregister(const char *path) 1074 { 1075 int i; 1076 int count; 1077 struct vhost_user_connection *conn, *next; 1078 1079 if (path == NULL) 1080 return -1; 1081 1082 again: 1083 pthread_mutex_lock(&vhost_user.mutex); 1084 1085 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 1086 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 1087 if (strcmp(vsocket->path, path)) 1088 continue; 1089 1090 if (vsocket->is_vduse) { 1091 vduse_device_destroy(path); 1092 } else if (vsocket->is_server) { 1093 /* 1094 * If r/wcb is executing, release vhost_user's 1095 * mutex lock, and try again since the r/wcb 1096 * may use the mutex lock. 1097 */ 1098 if (fdset_try_del(vhost_user.fdset, vsocket->socket_fd) == -1) { 1099 pthread_mutex_unlock(&vhost_user.mutex); 1100 goto again; 1101 } 1102 } else if (vsocket->reconnect) { 1103 vhost_user_remove_reconnect(vsocket); 1104 } 1105 1106 pthread_mutex_lock(&vsocket->conn_mutex); 1107 for (conn = TAILQ_FIRST(&vsocket->conn_list); 1108 conn != NULL; 1109 conn = next) { 1110 next = TAILQ_NEXT(conn, next); 1111 1112 /* 1113 * If r/wcb is executing, release vsocket's 1114 * conn_mutex and vhost_user's mutex locks, and 1115 * try again since the r/wcb may use the 1116 * conn_mutex and mutex locks. 1117 */ 1118 if (fdset_try_del(vhost_user.fdset, 1119 conn->connfd) == -1) { 1120 pthread_mutex_unlock(&vsocket->conn_mutex); 1121 pthread_mutex_unlock(&vhost_user.mutex); 1122 goto again; 1123 } 1124 1125 VHOST_CONFIG_LOG(path, INFO, "free connfd %d", conn->connfd); 1126 close(conn->connfd); 1127 vhost_destroy_device(conn->vid); 1128 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1129 free(conn); 1130 } 1131 pthread_mutex_unlock(&vsocket->conn_mutex); 1132 1133 if (vsocket->is_server) { 1134 close(vsocket->socket_fd); 1135 unlink(path); 1136 } 1137 1138 pthread_mutex_destroy(&vsocket->conn_mutex); 1139 vhost_user_socket_mem_free(vsocket); 1140 1141 count = --vhost_user.vsocket_cnt; 1142 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1143 vhost_user.vsockets[count] = NULL; 1144 pthread_mutex_unlock(&vhost_user.mutex); 1145 return 0; 1146 } 1147 pthread_mutex_unlock(&vhost_user.mutex); 1148 1149 return -1; 1150 } 1151 1152 /* 1153 * Register ops so that we can add/remove device to data core. 1154 */ 1155 int 1156 rte_vhost_driver_callback_register(const char *path, 1157 struct rte_vhost_device_ops const * const ops) 1158 { 1159 struct vhost_user_socket *vsocket; 1160 1161 pthread_mutex_lock(&vhost_user.mutex); 1162 vsocket = find_vhost_user_socket(path); 1163 if (vsocket) 1164 vsocket->notify_ops = ops; 1165 pthread_mutex_unlock(&vhost_user.mutex); 1166 1167 return vsocket ? 0 : -1; 1168 } 1169 1170 struct rte_vhost_device_ops const * 1171 vhost_driver_callback_get(const char *path) 1172 { 1173 struct vhost_user_socket *vsocket; 1174 1175 pthread_mutex_lock(&vhost_user.mutex); 1176 vsocket = find_vhost_user_socket(path); 1177 pthread_mutex_unlock(&vhost_user.mutex); 1178 1179 return vsocket ? vsocket->notify_ops : NULL; 1180 } 1181 1182 int 1183 rte_vhost_driver_start(const char *path) 1184 { 1185 struct vhost_user_socket *vsocket; 1186 1187 pthread_mutex_lock(&vhost_user.mutex); 1188 vsocket = find_vhost_user_socket(path); 1189 pthread_mutex_unlock(&vhost_user.mutex); 1190 1191 if (!vsocket) 1192 return -1; 1193 1194 if (vsocket->is_vduse) 1195 return vduse_device_create(path, vsocket->net_compliant_ol_flags); 1196 1197 if (vhost_user.fdset == NULL) { 1198 vhost_user.fdset = fdset_init("vhost-evt"); 1199 if (vhost_user.fdset == NULL) { 1200 VHOST_CONFIG_LOG(path, ERR, "failed to init Vhost-user fdset"); 1201 return -1; 1202 } 1203 } 1204 1205 if (vsocket->is_server) 1206 return vhost_user_start_server(vsocket); 1207 else 1208 return vhost_user_start_client(vsocket); 1209 } 1210