1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/socket.h> 12 #include <sys/un.h> 13 #include <sys/queue.h> 14 #include <errno.h> 15 #include <fcntl.h> 16 17 #include <rte_thread.h> 18 #include <rte_log.h> 19 20 #include "fd_man.h" 21 #include "vduse.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool is_vduse; 40 bool reconnect; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 bool extbuf; 44 bool linearbuf; 45 bool async_copy; 46 bool net_compliant_ol_flags; 47 bool stats_enabled; 48 49 /* 50 * The "supported_features" indicates the feature bits the 51 * vhost driver supports. The "features" indicates the feature 52 * bits after the rte_vhost_driver_features_disable/enable(). 53 * It is also the final feature bits used for vhost-user 54 * features negotiation. 55 */ 56 uint64_t supported_features; 57 uint64_t features; 58 59 uint64_t protocol_features; 60 61 uint32_t max_queue_pairs; 62 63 struct rte_vdpa_device *vdpa_dev; 64 65 struct rte_vhost_device_ops const *notify_ops; 66 }; 67 68 struct vhost_user_connection { 69 struct vhost_user_socket *vsocket; 70 int connfd; 71 int vid; 72 73 TAILQ_ENTRY(vhost_user_connection) next; 74 }; 75 76 #define MAX_VHOST_SOCKET 1024 77 struct vhost_user { 78 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 79 struct fdset fdset; 80 int vsocket_cnt; 81 pthread_mutex_t mutex; 82 }; 83 84 #define MAX_VIRTIO_BACKLOG 128 85 86 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 87 static void vhost_user_read_cb(int fd, void *dat, int *remove); 88 static int create_unix_socket(struct vhost_user_socket *vsocket); 89 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 90 91 static struct vhost_user vhost_user = { 92 .fdset = { 93 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, 94 .fd_mutex = PTHREAD_MUTEX_INITIALIZER, 95 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER, 96 .sync_mutex = PTHREAD_MUTEX_INITIALIZER, 97 .num = 0 98 }, 99 .vsocket_cnt = 0, 100 .mutex = PTHREAD_MUTEX_INITIALIZER, 101 }; 102 103 /* 104 * return bytes# of read on success or negative val on failure. Update fdnum 105 * with number of fds read. 106 */ 107 int 108 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds, 109 int *fd_num) 110 { 111 struct iovec iov; 112 struct msghdr msgh; 113 char control[CMSG_SPACE(max_fds * sizeof(int))]; 114 struct cmsghdr *cmsg; 115 int got_fds = 0; 116 int ret; 117 118 *fd_num = 0; 119 120 memset(&msgh, 0, sizeof(msgh)); 121 iov.iov_base = buf; 122 iov.iov_len = buflen; 123 124 msgh.msg_iov = &iov; 125 msgh.msg_iovlen = 1; 126 msgh.msg_control = control; 127 msgh.msg_controllen = sizeof(control); 128 129 ret = recvmsg(sockfd, &msgh, 0); 130 if (ret <= 0) { 131 if (ret) 132 VHOST_CONFIG_LOG(ifname, ERR, "recvmsg failed on fd %d (%s)", 133 sockfd, strerror(errno)); 134 return ret; 135 } 136 137 if (msgh.msg_flags & MSG_TRUNC) 138 VHOST_CONFIG_LOG(ifname, ERR, "truncated msg (fd %d)", sockfd); 139 140 /* MSG_CTRUNC may be caused by LSM misconfiguration */ 141 if (msgh.msg_flags & MSG_CTRUNC) 142 VHOST_CONFIG_LOG(ifname, ERR, "truncated control data (fd %d)", sockfd); 143 144 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 145 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 146 if ((cmsg->cmsg_level == SOL_SOCKET) && 147 (cmsg->cmsg_type == SCM_RIGHTS)) { 148 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 149 *fd_num = got_fds; 150 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 151 break; 152 } 153 } 154 155 /* Clear out unused file descriptors */ 156 while (got_fds < max_fds) 157 fds[got_fds++] = -1; 158 159 return ret; 160 } 161 162 int 163 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num) 164 { 165 166 struct iovec iov; 167 struct msghdr msgh; 168 size_t fdsize = fd_num * sizeof(int); 169 char control[CMSG_SPACE(fdsize)]; 170 struct cmsghdr *cmsg; 171 int ret; 172 173 memset(&msgh, 0, sizeof(msgh)); 174 iov.iov_base = buf; 175 iov.iov_len = buflen; 176 177 msgh.msg_iov = &iov; 178 msgh.msg_iovlen = 1; 179 180 if (fds && fd_num > 0) { 181 msgh.msg_control = control; 182 msgh.msg_controllen = sizeof(control); 183 cmsg = CMSG_FIRSTHDR(&msgh); 184 if (cmsg == NULL) { 185 VHOST_CONFIG_LOG(ifname, ERR, "cmsg == NULL"); 186 errno = EINVAL; 187 return -1; 188 } 189 cmsg->cmsg_len = CMSG_LEN(fdsize); 190 cmsg->cmsg_level = SOL_SOCKET; 191 cmsg->cmsg_type = SCM_RIGHTS; 192 memcpy(CMSG_DATA(cmsg), fds, fdsize); 193 } else { 194 msgh.msg_control = NULL; 195 msgh.msg_controllen = 0; 196 } 197 198 do { 199 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 200 } while (ret < 0 && errno == EINTR); 201 202 if (ret < 0) { 203 VHOST_CONFIG_LOG(ifname, ERR, "sendmsg error on fd %d (%s)", 204 sockfd, strerror(errno)); 205 return ret; 206 } 207 208 return ret; 209 } 210 211 static void 212 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 213 { 214 int vid; 215 size_t size; 216 struct vhost_user_connection *conn; 217 int ret; 218 struct virtio_net *dev; 219 220 if (vsocket == NULL) 221 return; 222 223 conn = malloc(sizeof(*conn)); 224 if (conn == NULL) { 225 close(fd); 226 return; 227 } 228 229 vid = vhost_user_new_device(); 230 if (vid == -1) { 231 goto err; 232 } 233 234 size = strnlen(vsocket->path, PATH_MAX); 235 vhost_set_ifname(vid, vsocket->path, size); 236 237 vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net, 238 vsocket->net_compliant_ol_flags, vsocket->stats_enabled, 239 vsocket->iommu_support); 240 241 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev); 242 243 if (vsocket->extbuf) 244 vhost_enable_extbuf(vid); 245 246 if (vsocket->linearbuf) 247 vhost_enable_linearbuf(vid); 248 249 if (vsocket->async_copy) { 250 dev = get_device(vid); 251 252 if (dev) 253 dev->async_copy = 1; 254 } 255 256 VHOST_CONFIG_LOG(vsocket->path, INFO, "new device, handle is %d", vid); 257 258 if (vsocket->notify_ops->new_connection) { 259 ret = vsocket->notify_ops->new_connection(vid); 260 if (ret < 0) { 261 VHOST_CONFIG_LOG(vsocket->path, ERR, 262 "failed to add vhost user connection with fd %d", 263 fd); 264 goto err_cleanup; 265 } 266 } 267 268 conn->connfd = fd; 269 conn->vsocket = vsocket; 270 conn->vid = vid; 271 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, 272 NULL, conn); 273 if (ret < 0) { 274 VHOST_CONFIG_LOG(vsocket->path, ERR, 275 "failed to add fd %d into vhost server fdset", 276 fd); 277 278 if (vsocket->notify_ops->destroy_connection) 279 vsocket->notify_ops->destroy_connection(conn->vid); 280 281 goto err_cleanup; 282 } 283 284 pthread_mutex_lock(&vsocket->conn_mutex); 285 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 286 pthread_mutex_unlock(&vsocket->conn_mutex); 287 288 fdset_pipe_notify(&vhost_user.fdset); 289 return; 290 291 err_cleanup: 292 vhost_destroy_device(vid); 293 err: 294 free(conn); 295 close(fd); 296 } 297 298 /* call back when there is new vhost-user connection from client */ 299 static void 300 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 301 { 302 struct vhost_user_socket *vsocket = dat; 303 304 fd = accept(fd, NULL, NULL); 305 if (fd < 0) 306 return; 307 308 VHOST_CONFIG_LOG(vsocket->path, INFO, "new vhost user connection is %d", fd); 309 vhost_user_add_connection(fd, vsocket); 310 } 311 312 static void 313 vhost_user_read_cb(int connfd, void *dat, int *remove) 314 { 315 struct vhost_user_connection *conn = dat; 316 struct vhost_user_socket *vsocket = conn->vsocket; 317 int ret; 318 319 ret = vhost_user_msg_handler(conn->vid, connfd); 320 if (ret < 0) { 321 struct virtio_net *dev = get_device(conn->vid); 322 323 close(connfd); 324 *remove = 1; 325 326 if (dev) 327 vhost_destroy_device_notify(dev); 328 329 if (vsocket->notify_ops->destroy_connection) 330 vsocket->notify_ops->destroy_connection(conn->vid); 331 332 vhost_destroy_device(conn->vid); 333 334 if (vsocket->reconnect) { 335 create_unix_socket(vsocket); 336 vhost_user_start_client(vsocket); 337 } 338 339 pthread_mutex_lock(&vsocket->conn_mutex); 340 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 341 pthread_mutex_unlock(&vsocket->conn_mutex); 342 343 free(conn); 344 } 345 } 346 347 static int 348 create_unix_socket(struct vhost_user_socket *vsocket) 349 { 350 int fd; 351 struct sockaddr_un *un = &vsocket->un; 352 353 fd = socket(AF_UNIX, SOCK_STREAM, 0); 354 if (fd < 0) 355 return -1; 356 VHOST_CONFIG_LOG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d", 357 vsocket->is_server ? "server" : "client", fd); 358 359 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 360 VHOST_CONFIG_LOG(vsocket->path, ERR, 361 "vhost-user: can't set nonblocking mode for socket, fd: %d (%s)", 362 fd, strerror(errno)); 363 close(fd); 364 return -1; 365 } 366 367 memset(un, 0, sizeof(*un)); 368 un->sun_family = AF_UNIX; 369 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 370 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 371 372 vsocket->socket_fd = fd; 373 return 0; 374 } 375 376 static int 377 vhost_user_start_server(struct vhost_user_socket *vsocket) 378 { 379 int ret; 380 int fd = vsocket->socket_fd; 381 const char *path = vsocket->path; 382 383 /* 384 * bind () may fail if the socket file with the same name already 385 * exists. But the library obviously should not delete the file 386 * provided by the user, since we can not be sure that it is not 387 * being used by other applications. Moreover, many applications form 388 * socket names based on user input, which is prone to errors. 389 * 390 * The user must ensure that the socket does not exist before 391 * registering the vhost driver in server mode. 392 */ 393 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 394 if (ret < 0) { 395 VHOST_CONFIG_LOG(path, ERR, "failed to bind: %s; remove it and try again", 396 strerror(errno)); 397 goto err; 398 } 399 VHOST_CONFIG_LOG(path, INFO, "binding succeeded"); 400 401 ret = listen(fd, MAX_VIRTIO_BACKLOG); 402 if (ret < 0) 403 goto err; 404 405 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, 406 NULL, vsocket); 407 if (ret < 0) { 408 VHOST_CONFIG_LOG(path, ERR, "failed to add listen fd %d to vhost server fdset", 409 fd); 410 goto err; 411 } 412 413 return 0; 414 415 err: 416 close(fd); 417 return -1; 418 } 419 420 struct vhost_user_reconnect { 421 struct sockaddr_un un; 422 int fd; 423 struct vhost_user_socket *vsocket; 424 425 TAILQ_ENTRY(vhost_user_reconnect) next; 426 }; 427 428 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 429 struct vhost_user_reconnect_list { 430 struct vhost_user_reconnect_tailq_list head; 431 pthread_mutex_t mutex; 432 }; 433 434 static struct vhost_user_reconnect_list reconn_list; 435 static rte_thread_t reconn_tid; 436 437 static int 438 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz) 439 { 440 int ret, flags; 441 442 ret = connect(fd, un, sz); 443 if (ret < 0 && errno != EISCONN) 444 return -1; 445 446 flags = fcntl(fd, F_GETFL, 0); 447 if (flags < 0) { 448 VHOST_CONFIG_LOG(path, ERR, "can't get flags for connfd %d (%s)", 449 fd, strerror(errno)); 450 return -2; 451 } 452 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 453 VHOST_CONFIG_LOG(path, ERR, "can't disable nonblocking on fd %d", fd); 454 return -2; 455 } 456 return 0; 457 } 458 459 static uint32_t 460 vhost_user_client_reconnect(void *arg __rte_unused) 461 { 462 int ret; 463 struct vhost_user_reconnect *reconn, *next; 464 465 while (1) { 466 pthread_mutex_lock(&reconn_list.mutex); 467 468 /* 469 * An equal implementation of TAILQ_FOREACH_SAFE, 470 * which does not exist on all platforms. 471 */ 472 for (reconn = TAILQ_FIRST(&reconn_list.head); 473 reconn != NULL; reconn = next) { 474 next = TAILQ_NEXT(reconn, next); 475 476 ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd, 477 (struct sockaddr *)&reconn->un, 478 sizeof(reconn->un)); 479 if (ret == -2) { 480 close(reconn->fd); 481 VHOST_CONFIG_LOG(reconn->vsocket->path, ERR, 482 "reconnection for fd %d failed", 483 reconn->fd); 484 goto remove_fd; 485 } 486 if (ret == -1) 487 continue; 488 489 VHOST_CONFIG_LOG(reconn->vsocket->path, INFO, "connected"); 490 vhost_user_add_connection(reconn->fd, reconn->vsocket); 491 remove_fd: 492 TAILQ_REMOVE(&reconn_list.head, reconn, next); 493 free(reconn); 494 } 495 496 pthread_mutex_unlock(&reconn_list.mutex); 497 sleep(1); 498 } 499 500 return 0; 501 } 502 503 static int 504 vhost_user_reconnect_init(void) 505 { 506 int ret; 507 508 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 509 if (ret < 0) { 510 VHOST_CONFIG_LOG("thread", ERR, "%s: failed to initialize mutex", __func__); 511 return ret; 512 } 513 TAILQ_INIT(&reconn_list.head); 514 515 ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco", 516 vhost_user_client_reconnect, NULL); 517 if (ret != 0) { 518 VHOST_CONFIG_LOG("thread", ERR, "failed to create reconnect thread"); 519 if (pthread_mutex_destroy(&reconn_list.mutex)) 520 VHOST_CONFIG_LOG("thread", ERR, 521 "%s: failed to destroy reconnect mutex", 522 __func__); 523 } 524 525 return ret; 526 } 527 528 static int 529 vhost_user_start_client(struct vhost_user_socket *vsocket) 530 { 531 int ret; 532 int fd = vsocket->socket_fd; 533 const char *path = vsocket->path; 534 struct vhost_user_reconnect *reconn; 535 536 ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un, 537 sizeof(vsocket->un)); 538 if (ret == 0) { 539 vhost_user_add_connection(fd, vsocket); 540 return 0; 541 } 542 543 VHOST_CONFIG_LOG(path, WARNING, "failed to connect: %s", strerror(errno)); 544 545 if (ret == -2 || !vsocket->reconnect) { 546 close(fd); 547 return -1; 548 } 549 550 VHOST_CONFIG_LOG(path, INFO, "reconnecting..."); 551 reconn = malloc(sizeof(*reconn)); 552 if (reconn == NULL) { 553 VHOST_CONFIG_LOG(path, ERR, "failed to allocate memory for reconnect"); 554 close(fd); 555 return -1; 556 } 557 reconn->un = vsocket->un; 558 reconn->fd = fd; 559 reconn->vsocket = vsocket; 560 pthread_mutex_lock(&reconn_list.mutex); 561 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 562 pthread_mutex_unlock(&reconn_list.mutex); 563 564 return 0; 565 } 566 567 static struct vhost_user_socket * 568 find_vhost_user_socket(const char *path) 569 { 570 int i; 571 572 if (path == NULL) 573 return NULL; 574 575 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 576 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 577 578 if (!strcmp(vsocket->path, path)) 579 return vsocket; 580 } 581 582 return NULL; 583 } 584 585 int 586 rte_vhost_driver_attach_vdpa_device(const char *path, 587 struct rte_vdpa_device *dev) 588 { 589 struct vhost_user_socket *vsocket; 590 591 if (dev == NULL || path == NULL) 592 return -1; 593 594 pthread_mutex_lock(&vhost_user.mutex); 595 vsocket = find_vhost_user_socket(path); 596 if (vsocket) 597 vsocket->vdpa_dev = dev; 598 pthread_mutex_unlock(&vhost_user.mutex); 599 600 return vsocket ? 0 : -1; 601 } 602 603 int 604 rte_vhost_driver_detach_vdpa_device(const char *path) 605 { 606 struct vhost_user_socket *vsocket; 607 608 pthread_mutex_lock(&vhost_user.mutex); 609 vsocket = find_vhost_user_socket(path); 610 if (vsocket) 611 vsocket->vdpa_dev = NULL; 612 pthread_mutex_unlock(&vhost_user.mutex); 613 614 return vsocket ? 0 : -1; 615 } 616 617 struct rte_vdpa_device * 618 rte_vhost_driver_get_vdpa_device(const char *path) 619 { 620 struct vhost_user_socket *vsocket; 621 struct rte_vdpa_device *dev = NULL; 622 623 pthread_mutex_lock(&vhost_user.mutex); 624 vsocket = find_vhost_user_socket(path); 625 if (vsocket) 626 dev = vsocket->vdpa_dev; 627 pthread_mutex_unlock(&vhost_user.mutex); 628 629 return dev; 630 } 631 632 int 633 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type) 634 { 635 struct vhost_user_socket *vsocket; 636 struct rte_vdpa_device *vdpa_dev; 637 int ret = 0; 638 639 pthread_mutex_lock(&vhost_user.mutex); 640 vsocket = find_vhost_user_socket(path); 641 if (!vsocket) { 642 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 643 ret = -1; 644 goto unlock_exit; 645 } 646 647 vdpa_dev = vsocket->vdpa_dev; 648 if (!vdpa_dev) { 649 ret = -1; 650 goto unlock_exit; 651 } 652 653 *type = vdpa_dev->type; 654 655 unlock_exit: 656 pthread_mutex_unlock(&vhost_user.mutex); 657 return ret; 658 } 659 660 int 661 rte_vhost_driver_disable_features(const char *path, uint64_t features) 662 { 663 struct vhost_user_socket *vsocket; 664 665 pthread_mutex_lock(&vhost_user.mutex); 666 vsocket = find_vhost_user_socket(path); 667 668 /* Note that use_builtin_virtio_net is not affected by this function 669 * since callers may want to selectively disable features of the 670 * built-in vhost net device backend. 671 */ 672 673 if (vsocket) 674 vsocket->features &= ~features; 675 pthread_mutex_unlock(&vhost_user.mutex); 676 677 return vsocket ? 0 : -1; 678 } 679 680 int 681 rte_vhost_driver_enable_features(const char *path, uint64_t features) 682 { 683 struct vhost_user_socket *vsocket; 684 685 pthread_mutex_lock(&vhost_user.mutex); 686 vsocket = find_vhost_user_socket(path); 687 if (vsocket) { 688 if ((vsocket->supported_features & features) != features) { 689 /* 690 * trying to enable features the driver doesn't 691 * support. 692 */ 693 pthread_mutex_unlock(&vhost_user.mutex); 694 return -1; 695 } 696 vsocket->features |= features; 697 } 698 pthread_mutex_unlock(&vhost_user.mutex); 699 700 return vsocket ? 0 : -1; 701 } 702 703 int 704 rte_vhost_driver_set_features(const char *path, uint64_t features) 705 { 706 struct vhost_user_socket *vsocket; 707 708 pthread_mutex_lock(&vhost_user.mutex); 709 vsocket = find_vhost_user_socket(path); 710 if (vsocket) { 711 vsocket->supported_features = features; 712 vsocket->features = features; 713 714 /* Anyone setting feature bits is implementing their own vhost 715 * device backend. 716 */ 717 vsocket->use_builtin_virtio_net = false; 718 } 719 pthread_mutex_unlock(&vhost_user.mutex); 720 721 return vsocket ? 0 : -1; 722 } 723 724 int 725 rte_vhost_driver_get_features(const char *path, uint64_t *features) 726 { 727 struct vhost_user_socket *vsocket; 728 uint64_t vdpa_features; 729 struct rte_vdpa_device *vdpa_dev; 730 int ret = 0; 731 732 pthread_mutex_lock(&vhost_user.mutex); 733 vsocket = find_vhost_user_socket(path); 734 if (!vsocket) { 735 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 736 ret = -1; 737 goto unlock_exit; 738 } 739 740 vdpa_dev = vsocket->vdpa_dev; 741 if (!vdpa_dev) { 742 *features = vsocket->features; 743 goto unlock_exit; 744 } 745 746 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) { 747 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa features for socket file."); 748 ret = -1; 749 goto unlock_exit; 750 } 751 752 *features = vsocket->features & vdpa_features; 753 754 unlock_exit: 755 pthread_mutex_unlock(&vhost_user.mutex); 756 return ret; 757 } 758 759 int 760 rte_vhost_driver_set_protocol_features(const char *path, 761 uint64_t protocol_features) 762 { 763 struct vhost_user_socket *vsocket; 764 765 pthread_mutex_lock(&vhost_user.mutex); 766 vsocket = find_vhost_user_socket(path); 767 if (vsocket) 768 vsocket->protocol_features = protocol_features; 769 pthread_mutex_unlock(&vhost_user.mutex); 770 return vsocket ? 0 : -1; 771 } 772 773 int 774 rte_vhost_driver_get_protocol_features(const char *path, 775 uint64_t *protocol_features) 776 { 777 struct vhost_user_socket *vsocket; 778 uint64_t vdpa_protocol_features; 779 struct rte_vdpa_device *vdpa_dev; 780 int ret = 0; 781 782 pthread_mutex_lock(&vhost_user.mutex); 783 vsocket = find_vhost_user_socket(path); 784 if (!vsocket) { 785 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 786 ret = -1; 787 goto unlock_exit; 788 } 789 790 vdpa_dev = vsocket->vdpa_dev; 791 if (!vdpa_dev) { 792 *protocol_features = vsocket->protocol_features; 793 goto unlock_exit; 794 } 795 796 if (vdpa_dev->ops->get_protocol_features(vdpa_dev, 797 &vdpa_protocol_features) < 0) { 798 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa protocol features."); 799 ret = -1; 800 goto unlock_exit; 801 } 802 803 *protocol_features = vsocket->protocol_features 804 & vdpa_protocol_features; 805 806 unlock_exit: 807 pthread_mutex_unlock(&vhost_user.mutex); 808 return ret; 809 } 810 811 int 812 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 813 { 814 struct vhost_user_socket *vsocket; 815 uint32_t vdpa_queue_num; 816 struct rte_vdpa_device *vdpa_dev; 817 int ret = 0; 818 819 pthread_mutex_lock(&vhost_user.mutex); 820 vsocket = find_vhost_user_socket(path); 821 if (!vsocket) { 822 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 823 ret = -1; 824 goto unlock_exit; 825 } 826 827 vdpa_dev = vsocket->vdpa_dev; 828 if (!vdpa_dev) { 829 *queue_num = vsocket->max_queue_pairs; 830 goto unlock_exit; 831 } 832 833 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) { 834 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa queue number."); 835 ret = -1; 836 goto unlock_exit; 837 } 838 839 *queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num); 840 841 unlock_exit: 842 pthread_mutex_unlock(&vhost_user.mutex); 843 return ret; 844 } 845 846 int 847 rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs) 848 { 849 struct vhost_user_socket *vsocket; 850 int ret = 0; 851 852 VHOST_CONFIG_LOG(path, INFO, "Setting max queue pairs to %u", max_queue_pairs); 853 854 if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) { 855 VHOST_CONFIG_LOG(path, ERR, "Library only supports up to %u queue pairs", 856 VHOST_MAX_QUEUE_PAIRS); 857 return -1; 858 } 859 860 pthread_mutex_lock(&vhost_user.mutex); 861 vsocket = find_vhost_user_socket(path); 862 if (!vsocket) { 863 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 864 ret = -1; 865 goto unlock_exit; 866 } 867 868 vsocket->max_queue_pairs = max_queue_pairs; 869 870 unlock_exit: 871 pthread_mutex_unlock(&vhost_user.mutex); 872 return ret; 873 } 874 875 static void 876 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 877 { 878 if (vsocket == NULL) 879 return; 880 881 free(vsocket->path); 882 free(vsocket); 883 } 884 885 /* 886 * Register a new vhost-user socket; here we could act as server 887 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 888 * is set. 889 */ 890 int 891 rte_vhost_driver_register(const char *path, uint64_t flags) 892 { 893 int ret = -1; 894 struct vhost_user_socket *vsocket; 895 896 if (!path) 897 return -1; 898 899 pthread_mutex_lock(&vhost_user.mutex); 900 901 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 902 VHOST_CONFIG_LOG(path, ERR, "the number of vhost sockets reaches maximum"); 903 goto out; 904 } 905 906 vsocket = malloc(sizeof(struct vhost_user_socket)); 907 if (!vsocket) 908 goto out; 909 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 910 vsocket->path = strdup(path); 911 if (vsocket->path == NULL) { 912 VHOST_CONFIG_LOG(path, ERR, "failed to copy socket path string"); 913 vhost_user_socket_mem_free(vsocket); 914 goto out; 915 } 916 TAILQ_INIT(&vsocket->conn_list); 917 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 918 if (ret) { 919 VHOST_CONFIG_LOG(path, ERR, "failed to init connection mutex"); 920 goto out_free; 921 } 922 923 if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/"))) 924 vsocket->is_vduse = true; 925 926 vsocket->vdpa_dev = NULL; 927 vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS; 928 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; 929 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; 930 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; 931 vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 932 vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE; 933 if (vsocket->is_vduse) 934 vsocket->iommu_support = true; 935 else 936 vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT; 937 938 if (vsocket->async_copy && (vsocket->iommu_support || 939 (flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) { 940 VHOST_CONFIG_LOG(path, ERR, "async copy with IOMMU or post-copy not supported"); 941 goto out_mutex; 942 } 943 944 /* 945 * Set the supported features correctly for the builtin vhost-user 946 * net driver. 947 * 948 * Applications know nothing about features the builtin virtio net 949 * driver (virtio_net.c) supports, thus it's not possible for them 950 * to invoke rte_vhost_driver_set_features(). To workaround it, here 951 * we set it unconditionally. If the application want to implement 952 * another vhost-user driver (say SCSI), it should call the 953 * rte_vhost_driver_set_features(), which will overwrite following 954 * two values. 955 */ 956 vsocket->use_builtin_virtio_net = true; 957 if (vsocket->is_vduse) { 958 vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES; 959 vsocket->features = VDUSE_NET_SUPPORTED_FEATURES; 960 } else { 961 vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES; 962 vsocket->features = VHOST_USER_NET_SUPPORTED_FEATURES; 963 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 964 } 965 966 if (vsocket->async_copy) { 967 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL); 968 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL); 969 VHOST_CONFIG_LOG(path, INFO, "logging feature is disabled in async copy mode"); 970 } 971 972 /* 973 * We'll not be able to receive a buffer from guest in linear mode 974 * without external buffer if it will not fit in a single mbuf, which is 975 * likely if segmentation offloading enabled. 976 */ 977 if (vsocket->linearbuf && !vsocket->extbuf) { 978 uint64_t seg_offload_features = 979 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 980 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 981 (1ULL << VIRTIO_NET_F_HOST_UFO); 982 983 VHOST_CONFIG_LOG(path, INFO, "Linear buffers requested without external buffers,"); 984 VHOST_CONFIG_LOG(path, INFO, "disabling host segmentation offloading support"); 985 vsocket->supported_features &= ~seg_offload_features; 986 vsocket->features &= ~seg_offload_features; 987 } 988 989 if (!vsocket->iommu_support) { 990 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 991 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 992 } 993 994 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 995 vsocket->protocol_features &= 996 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 997 } else { 998 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 999 VHOST_CONFIG_LOG(path, ERR, "Postcopy requested but not compiled"); 1000 ret = -1; 1001 goto out_mutex; 1002 #endif 1003 } 1004 1005 if (!vsocket->is_vduse) { 1006 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 1007 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 1008 if (vsocket->reconnect && reconn_tid.opaque_id == 0) { 1009 if (vhost_user_reconnect_init() != 0) 1010 goto out_mutex; 1011 } 1012 } else { 1013 vsocket->is_server = true; 1014 } 1015 ret = create_unix_socket(vsocket); 1016 if (ret < 0) 1017 goto out_mutex; 1018 } 1019 1020 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 1021 1022 pthread_mutex_unlock(&vhost_user.mutex); 1023 return ret; 1024 1025 out_mutex: 1026 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 1027 VHOST_CONFIG_LOG(path, ERR, "failed to destroy connection mutex"); 1028 } 1029 out_free: 1030 vhost_user_socket_mem_free(vsocket); 1031 out: 1032 pthread_mutex_unlock(&vhost_user.mutex); 1033 1034 return ret; 1035 } 1036 1037 static bool 1038 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 1039 { 1040 int found = false; 1041 struct vhost_user_reconnect *reconn, *next; 1042 1043 pthread_mutex_lock(&reconn_list.mutex); 1044 1045 for (reconn = TAILQ_FIRST(&reconn_list.head); 1046 reconn != NULL; reconn = next) { 1047 next = TAILQ_NEXT(reconn, next); 1048 1049 if (reconn->vsocket == vsocket) { 1050 TAILQ_REMOVE(&reconn_list.head, reconn, next); 1051 close(reconn->fd); 1052 free(reconn); 1053 found = true; 1054 break; 1055 } 1056 } 1057 pthread_mutex_unlock(&reconn_list.mutex); 1058 return found; 1059 } 1060 1061 /** 1062 * Unregister the specified vhost socket 1063 */ 1064 int 1065 rte_vhost_driver_unregister(const char *path) 1066 { 1067 int i; 1068 int count; 1069 struct vhost_user_connection *conn, *next; 1070 1071 if (path == NULL) 1072 return -1; 1073 1074 again: 1075 pthread_mutex_lock(&vhost_user.mutex); 1076 1077 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 1078 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 1079 if (strcmp(vsocket->path, path)) 1080 continue; 1081 1082 if (vsocket->is_vduse) { 1083 vduse_device_destroy(path); 1084 } else if (vsocket->is_server) { 1085 /* 1086 * If r/wcb is executing, release vhost_user's 1087 * mutex lock, and try again since the r/wcb 1088 * may use the mutex lock. 1089 */ 1090 if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) { 1091 pthread_mutex_unlock(&vhost_user.mutex); 1092 goto again; 1093 } 1094 } else if (vsocket->reconnect) { 1095 vhost_user_remove_reconnect(vsocket); 1096 } 1097 1098 pthread_mutex_lock(&vsocket->conn_mutex); 1099 for (conn = TAILQ_FIRST(&vsocket->conn_list); 1100 conn != NULL; 1101 conn = next) { 1102 next = TAILQ_NEXT(conn, next); 1103 1104 /* 1105 * If r/wcb is executing, release vsocket's 1106 * conn_mutex and vhost_user's mutex locks, and 1107 * try again since the r/wcb may use the 1108 * conn_mutex and mutex locks. 1109 */ 1110 if (fdset_try_del(&vhost_user.fdset, 1111 conn->connfd) == -1) { 1112 pthread_mutex_unlock(&vsocket->conn_mutex); 1113 pthread_mutex_unlock(&vhost_user.mutex); 1114 goto again; 1115 } 1116 1117 VHOST_CONFIG_LOG(path, INFO, "free connfd %d", conn->connfd); 1118 close(conn->connfd); 1119 vhost_destroy_device(conn->vid); 1120 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1121 free(conn); 1122 } 1123 pthread_mutex_unlock(&vsocket->conn_mutex); 1124 1125 if (vsocket->is_server) { 1126 close(vsocket->socket_fd); 1127 unlink(path); 1128 } 1129 1130 pthread_mutex_destroy(&vsocket->conn_mutex); 1131 vhost_user_socket_mem_free(vsocket); 1132 1133 count = --vhost_user.vsocket_cnt; 1134 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1135 vhost_user.vsockets[count] = NULL; 1136 pthread_mutex_unlock(&vhost_user.mutex); 1137 return 0; 1138 } 1139 pthread_mutex_unlock(&vhost_user.mutex); 1140 1141 return -1; 1142 } 1143 1144 /* 1145 * Register ops so that we can add/remove device to data core. 1146 */ 1147 int 1148 rte_vhost_driver_callback_register(const char *path, 1149 struct rte_vhost_device_ops const * const ops) 1150 { 1151 struct vhost_user_socket *vsocket; 1152 1153 pthread_mutex_lock(&vhost_user.mutex); 1154 vsocket = find_vhost_user_socket(path); 1155 if (vsocket) 1156 vsocket->notify_ops = ops; 1157 pthread_mutex_unlock(&vhost_user.mutex); 1158 1159 return vsocket ? 0 : -1; 1160 } 1161 1162 struct rte_vhost_device_ops const * 1163 vhost_driver_callback_get(const char *path) 1164 { 1165 struct vhost_user_socket *vsocket; 1166 1167 pthread_mutex_lock(&vhost_user.mutex); 1168 vsocket = find_vhost_user_socket(path); 1169 pthread_mutex_unlock(&vhost_user.mutex); 1170 1171 return vsocket ? vsocket->notify_ops : NULL; 1172 } 1173 1174 int 1175 rte_vhost_driver_start(const char *path) 1176 { 1177 struct vhost_user_socket *vsocket; 1178 static rte_thread_t fdset_tid; 1179 1180 pthread_mutex_lock(&vhost_user.mutex); 1181 vsocket = find_vhost_user_socket(path); 1182 pthread_mutex_unlock(&vhost_user.mutex); 1183 1184 if (!vsocket) 1185 return -1; 1186 1187 if (vsocket->is_vduse) 1188 return vduse_device_create(path, vsocket->net_compliant_ol_flags); 1189 1190 if (fdset_tid.opaque_id == 0) { 1191 /** 1192 * create a pipe which will be waited by poll and notified to 1193 * rebuild the wait list of poll. 1194 */ 1195 if (fdset_pipe_init(&vhost_user.fdset) < 0) { 1196 VHOST_CONFIG_LOG(path, ERR, "failed to create pipe for vhost fdset"); 1197 return -1; 1198 } 1199 1200 int ret = rte_thread_create_internal_control(&fdset_tid, 1201 "vhost-evt", fdset_event_dispatch, &vhost_user.fdset); 1202 if (ret != 0) { 1203 VHOST_CONFIG_LOG(path, ERR, "failed to create fdset handling thread"); 1204 fdset_pipe_uninit(&vhost_user.fdset); 1205 return -1; 1206 } 1207 } 1208 1209 if (vsocket->is_server) 1210 return vhost_user_start_server(vsocket); 1211 else 1212 return vhost_user_start_client(vsocket); 1213 } 1214