1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/socket.h> 12 #include <sys/un.h> 13 #include <sys/queue.h> 14 #include <errno.h> 15 #include <fcntl.h> 16 17 #include <rte_thread.h> 18 #include <rte_log.h> 19 20 #include "fd_man.h" 21 #include "vduse.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool is_vduse; 40 bool reconnect; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 bool extbuf; 44 bool linearbuf; 45 bool async_copy; 46 bool net_compliant_ol_flags; 47 bool stats_enabled; 48 49 /* 50 * The "supported_features" indicates the feature bits the 51 * vhost driver supports. The "features" indicates the feature 52 * bits after the rte_vhost_driver_features_disable/enable(). 53 * It is also the final feature bits used for vhost-user 54 * features negotiation. 55 */ 56 uint64_t supported_features; 57 uint64_t features; 58 59 uint64_t protocol_features; 60 61 uint32_t max_queue_pairs; 62 63 struct rte_vdpa_device *vdpa_dev; 64 65 struct rte_vhost_device_ops const *notify_ops; 66 }; 67 68 struct vhost_user_connection { 69 struct vhost_user_socket *vsocket; 70 int connfd; 71 int vid; 72 73 TAILQ_ENTRY(vhost_user_connection) next; 74 }; 75 76 #define MAX_VHOST_SOCKET 1024 77 struct vhost_user { 78 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 79 struct fdset fdset; 80 int vsocket_cnt; 81 pthread_mutex_t mutex; 82 }; 83 84 #define MAX_VIRTIO_BACKLOG 128 85 86 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 87 static void vhost_user_read_cb(int fd, void *dat, int *remove); 88 static int create_unix_socket(struct vhost_user_socket *vsocket); 89 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 90 91 static struct vhost_user vhost_user = { 92 .fdset = { 93 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, 94 .fd_mutex = PTHREAD_MUTEX_INITIALIZER, 95 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER, 96 .num = 0 97 }, 98 .vsocket_cnt = 0, 99 .mutex = PTHREAD_MUTEX_INITIALIZER, 100 }; 101 102 /* 103 * return bytes# of read on success or negative val on failure. Update fdnum 104 * with number of fds read. 105 */ 106 int 107 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds, 108 int *fd_num) 109 { 110 struct iovec iov; 111 struct msghdr msgh; 112 char control[CMSG_SPACE(max_fds * sizeof(int))]; 113 struct cmsghdr *cmsg; 114 int got_fds = 0; 115 int ret; 116 117 *fd_num = 0; 118 119 memset(&msgh, 0, sizeof(msgh)); 120 iov.iov_base = buf; 121 iov.iov_len = buflen; 122 123 msgh.msg_iov = &iov; 124 msgh.msg_iovlen = 1; 125 msgh.msg_control = control; 126 msgh.msg_controllen = sizeof(control); 127 128 ret = recvmsg(sockfd, &msgh, 0); 129 if (ret <= 0) { 130 if (ret) 131 VHOST_LOG_CONFIG(ifname, ERR, "recvmsg failed on fd %d (%s)\n", 132 sockfd, strerror(errno)); 133 return ret; 134 } 135 136 if (msgh.msg_flags & MSG_TRUNC) 137 VHOST_LOG_CONFIG(ifname, ERR, "truncated msg (fd %d)\n", sockfd); 138 139 /* MSG_CTRUNC may be caused by LSM misconfiguration */ 140 if (msgh.msg_flags & MSG_CTRUNC) 141 VHOST_LOG_CONFIG(ifname, ERR, "truncated control data (fd %d)\n", sockfd); 142 143 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 144 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 145 if ((cmsg->cmsg_level == SOL_SOCKET) && 146 (cmsg->cmsg_type == SCM_RIGHTS)) { 147 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 148 *fd_num = got_fds; 149 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 150 break; 151 } 152 } 153 154 /* Clear out unused file descriptors */ 155 while (got_fds < max_fds) 156 fds[got_fds++] = -1; 157 158 return ret; 159 } 160 161 int 162 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num) 163 { 164 165 struct iovec iov; 166 struct msghdr msgh; 167 size_t fdsize = fd_num * sizeof(int); 168 char control[CMSG_SPACE(fdsize)]; 169 struct cmsghdr *cmsg; 170 int ret; 171 172 memset(&msgh, 0, sizeof(msgh)); 173 iov.iov_base = buf; 174 iov.iov_len = buflen; 175 176 msgh.msg_iov = &iov; 177 msgh.msg_iovlen = 1; 178 179 if (fds && fd_num > 0) { 180 msgh.msg_control = control; 181 msgh.msg_controllen = sizeof(control); 182 cmsg = CMSG_FIRSTHDR(&msgh); 183 if (cmsg == NULL) { 184 VHOST_LOG_CONFIG(ifname, ERR, "cmsg == NULL\n"); 185 errno = EINVAL; 186 return -1; 187 } 188 cmsg->cmsg_len = CMSG_LEN(fdsize); 189 cmsg->cmsg_level = SOL_SOCKET; 190 cmsg->cmsg_type = SCM_RIGHTS; 191 memcpy(CMSG_DATA(cmsg), fds, fdsize); 192 } else { 193 msgh.msg_control = NULL; 194 msgh.msg_controllen = 0; 195 } 196 197 do { 198 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 199 } while (ret < 0 && errno == EINTR); 200 201 if (ret < 0) { 202 VHOST_LOG_CONFIG(ifname, ERR, "sendmsg error on fd %d (%s)\n", 203 sockfd, strerror(errno)); 204 return ret; 205 } 206 207 return ret; 208 } 209 210 static void 211 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 212 { 213 int vid; 214 size_t size; 215 struct vhost_user_connection *conn; 216 int ret; 217 struct virtio_net *dev; 218 219 if (vsocket == NULL) 220 return; 221 222 conn = malloc(sizeof(*conn)); 223 if (conn == NULL) { 224 close(fd); 225 return; 226 } 227 228 vid = vhost_user_new_device(); 229 if (vid == -1) { 230 goto err; 231 } 232 233 size = strnlen(vsocket->path, PATH_MAX); 234 vhost_set_ifname(vid, vsocket->path, size); 235 236 vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net, 237 vsocket->net_compliant_ol_flags, vsocket->stats_enabled, 238 vsocket->iommu_support); 239 240 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev); 241 242 if (vsocket->extbuf) 243 vhost_enable_extbuf(vid); 244 245 if (vsocket->linearbuf) 246 vhost_enable_linearbuf(vid); 247 248 if (vsocket->async_copy) { 249 dev = get_device(vid); 250 251 if (dev) 252 dev->async_copy = 1; 253 } 254 255 VHOST_LOG_CONFIG(vsocket->path, INFO, "new device, handle is %d\n", vid); 256 257 if (vsocket->notify_ops->new_connection) { 258 ret = vsocket->notify_ops->new_connection(vid); 259 if (ret < 0) { 260 VHOST_LOG_CONFIG(vsocket->path, ERR, 261 "failed to add vhost user connection with fd %d\n", 262 fd); 263 goto err_cleanup; 264 } 265 } 266 267 conn->connfd = fd; 268 conn->vsocket = vsocket; 269 conn->vid = vid; 270 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, 271 NULL, conn); 272 if (ret < 0) { 273 VHOST_LOG_CONFIG(vsocket->path, ERR, 274 "failed to add fd %d into vhost server fdset\n", 275 fd); 276 277 if (vsocket->notify_ops->destroy_connection) 278 vsocket->notify_ops->destroy_connection(conn->vid); 279 280 goto err_cleanup; 281 } 282 283 pthread_mutex_lock(&vsocket->conn_mutex); 284 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 285 pthread_mutex_unlock(&vsocket->conn_mutex); 286 287 fdset_pipe_notify(&vhost_user.fdset); 288 return; 289 290 err_cleanup: 291 vhost_destroy_device(vid); 292 err: 293 free(conn); 294 close(fd); 295 } 296 297 /* call back when there is new vhost-user connection from client */ 298 static void 299 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 300 { 301 struct vhost_user_socket *vsocket = dat; 302 303 fd = accept(fd, NULL, NULL); 304 if (fd < 0) 305 return; 306 307 VHOST_LOG_CONFIG(vsocket->path, INFO, "new vhost user connection is %d\n", fd); 308 vhost_user_add_connection(fd, vsocket); 309 } 310 311 static void 312 vhost_user_read_cb(int connfd, void *dat, int *remove) 313 { 314 struct vhost_user_connection *conn = dat; 315 struct vhost_user_socket *vsocket = conn->vsocket; 316 int ret; 317 318 ret = vhost_user_msg_handler(conn->vid, connfd); 319 if (ret < 0) { 320 struct virtio_net *dev = get_device(conn->vid); 321 322 close(connfd); 323 *remove = 1; 324 325 if (dev) 326 vhost_destroy_device_notify(dev); 327 328 if (vsocket->notify_ops->destroy_connection) 329 vsocket->notify_ops->destroy_connection(conn->vid); 330 331 vhost_destroy_device(conn->vid); 332 333 if (vsocket->reconnect) { 334 create_unix_socket(vsocket); 335 vhost_user_start_client(vsocket); 336 } 337 338 pthread_mutex_lock(&vsocket->conn_mutex); 339 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 340 pthread_mutex_unlock(&vsocket->conn_mutex); 341 342 free(conn); 343 } 344 } 345 346 static int 347 create_unix_socket(struct vhost_user_socket *vsocket) 348 { 349 int fd; 350 struct sockaddr_un *un = &vsocket->un; 351 352 fd = socket(AF_UNIX, SOCK_STREAM, 0); 353 if (fd < 0) 354 return -1; 355 VHOST_LOG_CONFIG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d\n", 356 vsocket->is_server ? "server" : "client", fd); 357 358 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 359 VHOST_LOG_CONFIG(vsocket->path, ERR, 360 "vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n", 361 fd, strerror(errno)); 362 close(fd); 363 return -1; 364 } 365 366 memset(un, 0, sizeof(*un)); 367 un->sun_family = AF_UNIX; 368 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 369 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 370 371 vsocket->socket_fd = fd; 372 return 0; 373 } 374 375 static int 376 vhost_user_start_server(struct vhost_user_socket *vsocket) 377 { 378 int ret; 379 int fd = vsocket->socket_fd; 380 const char *path = vsocket->path; 381 382 /* 383 * bind () may fail if the socket file with the same name already 384 * exists. But the library obviously should not delete the file 385 * provided by the user, since we can not be sure that it is not 386 * being used by other applications. Moreover, many applications form 387 * socket names based on user input, which is prone to errors. 388 * 389 * The user must ensure that the socket does not exist before 390 * registering the vhost driver in server mode. 391 */ 392 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 393 if (ret < 0) { 394 VHOST_LOG_CONFIG(path, ERR, "failed to bind: %s; remove it and try again\n", 395 strerror(errno)); 396 goto err; 397 } 398 VHOST_LOG_CONFIG(path, INFO, "binding succeeded\n"); 399 400 ret = listen(fd, MAX_VIRTIO_BACKLOG); 401 if (ret < 0) 402 goto err; 403 404 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, 405 NULL, vsocket); 406 if (ret < 0) { 407 VHOST_LOG_CONFIG(path, ERR, "failed to add listen fd %d to vhost server fdset\n", 408 fd); 409 goto err; 410 } 411 412 return 0; 413 414 err: 415 close(fd); 416 return -1; 417 } 418 419 struct vhost_user_reconnect { 420 struct sockaddr_un un; 421 int fd; 422 struct vhost_user_socket *vsocket; 423 424 TAILQ_ENTRY(vhost_user_reconnect) next; 425 }; 426 427 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 428 struct vhost_user_reconnect_list { 429 struct vhost_user_reconnect_tailq_list head; 430 pthread_mutex_t mutex; 431 }; 432 433 static struct vhost_user_reconnect_list reconn_list; 434 static rte_thread_t reconn_tid; 435 436 static int 437 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz) 438 { 439 int ret, flags; 440 441 ret = connect(fd, un, sz); 442 if (ret < 0 && errno != EISCONN) 443 return -1; 444 445 flags = fcntl(fd, F_GETFL, 0); 446 if (flags < 0) { 447 VHOST_LOG_CONFIG(path, ERR, "can't get flags for connfd %d (%s)\n", 448 fd, strerror(errno)); 449 return -2; 450 } 451 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 452 VHOST_LOG_CONFIG(path, ERR, "can't disable nonblocking on fd %d\n", fd); 453 return -2; 454 } 455 return 0; 456 } 457 458 static uint32_t 459 vhost_user_client_reconnect(void *arg __rte_unused) 460 { 461 int ret; 462 struct vhost_user_reconnect *reconn, *next; 463 464 while (1) { 465 pthread_mutex_lock(&reconn_list.mutex); 466 467 /* 468 * An equal implementation of TAILQ_FOREACH_SAFE, 469 * which does not exist on all platforms. 470 */ 471 for (reconn = TAILQ_FIRST(&reconn_list.head); 472 reconn != NULL; reconn = next) { 473 next = TAILQ_NEXT(reconn, next); 474 475 ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd, 476 (struct sockaddr *)&reconn->un, 477 sizeof(reconn->un)); 478 if (ret == -2) { 479 close(reconn->fd); 480 VHOST_LOG_CONFIG(reconn->vsocket->path, ERR, 481 "reconnection for fd %d failed\n", 482 reconn->fd); 483 goto remove_fd; 484 } 485 if (ret == -1) 486 continue; 487 488 VHOST_LOG_CONFIG(reconn->vsocket->path, INFO, "connected\n"); 489 vhost_user_add_connection(reconn->fd, reconn->vsocket); 490 remove_fd: 491 TAILQ_REMOVE(&reconn_list.head, reconn, next); 492 free(reconn); 493 } 494 495 pthread_mutex_unlock(&reconn_list.mutex); 496 sleep(1); 497 } 498 499 return 0; 500 } 501 502 static int 503 vhost_user_reconnect_init(void) 504 { 505 int ret; 506 507 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 508 if (ret < 0) { 509 VHOST_LOG_CONFIG("thread", ERR, "%s: failed to initialize mutex\n", __func__); 510 return ret; 511 } 512 TAILQ_INIT(&reconn_list.head); 513 514 ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco", 515 vhost_user_client_reconnect, NULL); 516 if (ret != 0) { 517 VHOST_LOG_CONFIG("thread", ERR, "failed to create reconnect thread\n"); 518 if (pthread_mutex_destroy(&reconn_list.mutex)) 519 VHOST_LOG_CONFIG("thread", ERR, 520 "%s: failed to destroy reconnect mutex\n", 521 __func__); 522 } 523 524 return ret; 525 } 526 527 static int 528 vhost_user_start_client(struct vhost_user_socket *vsocket) 529 { 530 int ret; 531 int fd = vsocket->socket_fd; 532 const char *path = vsocket->path; 533 struct vhost_user_reconnect *reconn; 534 535 ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un, 536 sizeof(vsocket->un)); 537 if (ret == 0) { 538 vhost_user_add_connection(fd, vsocket); 539 return 0; 540 } 541 542 VHOST_LOG_CONFIG(path, WARNING, "failed to connect: %s\n", strerror(errno)); 543 544 if (ret == -2 || !vsocket->reconnect) { 545 close(fd); 546 return -1; 547 } 548 549 VHOST_LOG_CONFIG(path, INFO, "reconnecting...\n"); 550 reconn = malloc(sizeof(*reconn)); 551 if (reconn == NULL) { 552 VHOST_LOG_CONFIG(path, ERR, "failed to allocate memory for reconnect\n"); 553 close(fd); 554 return -1; 555 } 556 reconn->un = vsocket->un; 557 reconn->fd = fd; 558 reconn->vsocket = vsocket; 559 pthread_mutex_lock(&reconn_list.mutex); 560 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 561 pthread_mutex_unlock(&reconn_list.mutex); 562 563 return 0; 564 } 565 566 static struct vhost_user_socket * 567 find_vhost_user_socket(const char *path) 568 { 569 int i; 570 571 if (path == NULL) 572 return NULL; 573 574 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 575 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 576 577 if (!strcmp(vsocket->path, path)) 578 return vsocket; 579 } 580 581 return NULL; 582 } 583 584 int 585 rte_vhost_driver_attach_vdpa_device(const char *path, 586 struct rte_vdpa_device *dev) 587 { 588 struct vhost_user_socket *vsocket; 589 590 if (dev == NULL || path == NULL) 591 return -1; 592 593 pthread_mutex_lock(&vhost_user.mutex); 594 vsocket = find_vhost_user_socket(path); 595 if (vsocket) 596 vsocket->vdpa_dev = dev; 597 pthread_mutex_unlock(&vhost_user.mutex); 598 599 return vsocket ? 0 : -1; 600 } 601 602 int 603 rte_vhost_driver_detach_vdpa_device(const char *path) 604 { 605 struct vhost_user_socket *vsocket; 606 607 pthread_mutex_lock(&vhost_user.mutex); 608 vsocket = find_vhost_user_socket(path); 609 if (vsocket) 610 vsocket->vdpa_dev = NULL; 611 pthread_mutex_unlock(&vhost_user.mutex); 612 613 return vsocket ? 0 : -1; 614 } 615 616 struct rte_vdpa_device * 617 rte_vhost_driver_get_vdpa_device(const char *path) 618 { 619 struct vhost_user_socket *vsocket; 620 struct rte_vdpa_device *dev = NULL; 621 622 pthread_mutex_lock(&vhost_user.mutex); 623 vsocket = find_vhost_user_socket(path); 624 if (vsocket) 625 dev = vsocket->vdpa_dev; 626 pthread_mutex_unlock(&vhost_user.mutex); 627 628 return dev; 629 } 630 631 int 632 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type) 633 { 634 struct vhost_user_socket *vsocket; 635 struct rte_vdpa_device *vdpa_dev; 636 int ret = 0; 637 638 pthread_mutex_lock(&vhost_user.mutex); 639 vsocket = find_vhost_user_socket(path); 640 if (!vsocket) { 641 VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n"); 642 ret = -1; 643 goto unlock_exit; 644 } 645 646 vdpa_dev = vsocket->vdpa_dev; 647 if (!vdpa_dev) { 648 ret = -1; 649 goto unlock_exit; 650 } 651 652 *type = vdpa_dev->type; 653 654 unlock_exit: 655 pthread_mutex_unlock(&vhost_user.mutex); 656 return ret; 657 } 658 659 int 660 rte_vhost_driver_disable_features(const char *path, uint64_t features) 661 { 662 struct vhost_user_socket *vsocket; 663 664 pthread_mutex_lock(&vhost_user.mutex); 665 vsocket = find_vhost_user_socket(path); 666 667 /* Note that use_builtin_virtio_net is not affected by this function 668 * since callers may want to selectively disable features of the 669 * built-in vhost net device backend. 670 */ 671 672 if (vsocket) 673 vsocket->features &= ~features; 674 pthread_mutex_unlock(&vhost_user.mutex); 675 676 return vsocket ? 0 : -1; 677 } 678 679 int 680 rte_vhost_driver_enable_features(const char *path, uint64_t features) 681 { 682 struct vhost_user_socket *vsocket; 683 684 pthread_mutex_lock(&vhost_user.mutex); 685 vsocket = find_vhost_user_socket(path); 686 if (vsocket) { 687 if ((vsocket->supported_features & features) != features) { 688 /* 689 * trying to enable features the driver doesn't 690 * support. 691 */ 692 pthread_mutex_unlock(&vhost_user.mutex); 693 return -1; 694 } 695 vsocket->features |= features; 696 } 697 pthread_mutex_unlock(&vhost_user.mutex); 698 699 return vsocket ? 0 : -1; 700 } 701 702 int 703 rte_vhost_driver_set_features(const char *path, uint64_t features) 704 { 705 struct vhost_user_socket *vsocket; 706 707 pthread_mutex_lock(&vhost_user.mutex); 708 vsocket = find_vhost_user_socket(path); 709 if (vsocket) { 710 vsocket->supported_features = features; 711 vsocket->features = features; 712 713 /* Anyone setting feature bits is implementing their own vhost 714 * device backend. 715 */ 716 vsocket->use_builtin_virtio_net = false; 717 } 718 pthread_mutex_unlock(&vhost_user.mutex); 719 720 return vsocket ? 0 : -1; 721 } 722 723 int 724 rte_vhost_driver_get_features(const char *path, uint64_t *features) 725 { 726 struct vhost_user_socket *vsocket; 727 uint64_t vdpa_features; 728 struct rte_vdpa_device *vdpa_dev; 729 int ret = 0; 730 731 pthread_mutex_lock(&vhost_user.mutex); 732 vsocket = find_vhost_user_socket(path); 733 if (!vsocket) { 734 VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n"); 735 ret = -1; 736 goto unlock_exit; 737 } 738 739 vdpa_dev = vsocket->vdpa_dev; 740 if (!vdpa_dev) { 741 *features = vsocket->features; 742 goto unlock_exit; 743 } 744 745 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) { 746 VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa features for socket file.\n"); 747 ret = -1; 748 goto unlock_exit; 749 } 750 751 *features = vsocket->features & vdpa_features; 752 753 unlock_exit: 754 pthread_mutex_unlock(&vhost_user.mutex); 755 return ret; 756 } 757 758 int 759 rte_vhost_driver_set_protocol_features(const char *path, 760 uint64_t protocol_features) 761 { 762 struct vhost_user_socket *vsocket; 763 764 pthread_mutex_lock(&vhost_user.mutex); 765 vsocket = find_vhost_user_socket(path); 766 if (vsocket) 767 vsocket->protocol_features = protocol_features; 768 pthread_mutex_unlock(&vhost_user.mutex); 769 return vsocket ? 0 : -1; 770 } 771 772 int 773 rte_vhost_driver_get_protocol_features(const char *path, 774 uint64_t *protocol_features) 775 { 776 struct vhost_user_socket *vsocket; 777 uint64_t vdpa_protocol_features; 778 struct rte_vdpa_device *vdpa_dev; 779 int ret = 0; 780 781 pthread_mutex_lock(&vhost_user.mutex); 782 vsocket = find_vhost_user_socket(path); 783 if (!vsocket) { 784 VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n"); 785 ret = -1; 786 goto unlock_exit; 787 } 788 789 vdpa_dev = vsocket->vdpa_dev; 790 if (!vdpa_dev) { 791 *protocol_features = vsocket->protocol_features; 792 goto unlock_exit; 793 } 794 795 if (vdpa_dev->ops->get_protocol_features(vdpa_dev, 796 &vdpa_protocol_features) < 0) { 797 VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa protocol features.\n"); 798 ret = -1; 799 goto unlock_exit; 800 } 801 802 *protocol_features = vsocket->protocol_features 803 & vdpa_protocol_features; 804 805 unlock_exit: 806 pthread_mutex_unlock(&vhost_user.mutex); 807 return ret; 808 } 809 810 int 811 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 812 { 813 struct vhost_user_socket *vsocket; 814 uint32_t vdpa_queue_num; 815 struct rte_vdpa_device *vdpa_dev; 816 int ret = 0; 817 818 pthread_mutex_lock(&vhost_user.mutex); 819 vsocket = find_vhost_user_socket(path); 820 if (!vsocket) { 821 VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n"); 822 ret = -1; 823 goto unlock_exit; 824 } 825 826 vdpa_dev = vsocket->vdpa_dev; 827 if (!vdpa_dev) { 828 *queue_num = vsocket->max_queue_pairs; 829 goto unlock_exit; 830 } 831 832 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) { 833 VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa queue number.\n"); 834 ret = -1; 835 goto unlock_exit; 836 } 837 838 *queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num); 839 840 unlock_exit: 841 pthread_mutex_unlock(&vhost_user.mutex); 842 return ret; 843 } 844 845 int 846 rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs) 847 { 848 struct vhost_user_socket *vsocket; 849 int ret = 0; 850 851 VHOST_LOG_CONFIG(path, INFO, "Setting max queue pairs to %u\n", max_queue_pairs); 852 853 if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) { 854 VHOST_LOG_CONFIG(path, ERR, "Library only supports up to %u queue pairs\n", 855 VHOST_MAX_QUEUE_PAIRS); 856 return -1; 857 } 858 859 pthread_mutex_lock(&vhost_user.mutex); 860 vsocket = find_vhost_user_socket(path); 861 if (!vsocket) { 862 VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n"); 863 ret = -1; 864 goto unlock_exit; 865 } 866 867 vsocket->max_queue_pairs = max_queue_pairs; 868 869 unlock_exit: 870 pthread_mutex_unlock(&vhost_user.mutex); 871 return ret; 872 } 873 874 static void 875 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 876 { 877 if (vsocket == NULL) 878 return; 879 880 free(vsocket->path); 881 free(vsocket); 882 } 883 884 /* 885 * Register a new vhost-user socket; here we could act as server 886 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 887 * is set. 888 */ 889 int 890 rte_vhost_driver_register(const char *path, uint64_t flags) 891 { 892 int ret = -1; 893 struct vhost_user_socket *vsocket; 894 895 if (!path) 896 return -1; 897 898 pthread_mutex_lock(&vhost_user.mutex); 899 900 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 901 VHOST_LOG_CONFIG(path, ERR, "the number of vhost sockets reaches maximum\n"); 902 goto out; 903 } 904 905 vsocket = malloc(sizeof(struct vhost_user_socket)); 906 if (!vsocket) 907 goto out; 908 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 909 vsocket->path = strdup(path); 910 if (vsocket->path == NULL) { 911 VHOST_LOG_CONFIG(path, ERR, "failed to copy socket path string\n"); 912 vhost_user_socket_mem_free(vsocket); 913 goto out; 914 } 915 TAILQ_INIT(&vsocket->conn_list); 916 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 917 if (ret) { 918 VHOST_LOG_CONFIG(path, ERR, "failed to init connection mutex\n"); 919 goto out_free; 920 } 921 922 if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/"))) 923 vsocket->is_vduse = true; 924 925 vsocket->vdpa_dev = NULL; 926 vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS; 927 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; 928 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; 929 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; 930 vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 931 vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE; 932 if (vsocket->is_vduse) 933 vsocket->iommu_support = true; 934 else 935 vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT; 936 937 if (vsocket->async_copy && (vsocket->iommu_support || 938 (flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) { 939 VHOST_LOG_CONFIG(path, ERR, "async copy with IOMMU or post-copy not supported\n"); 940 goto out_mutex; 941 } 942 943 /* 944 * Set the supported features correctly for the builtin vhost-user 945 * net driver. 946 * 947 * Applications know nothing about features the builtin virtio net 948 * driver (virtio_net.c) supports, thus it's not possible for them 949 * to invoke rte_vhost_driver_set_features(). To workaround it, here 950 * we set it unconditionally. If the application want to implement 951 * another vhost-user driver (say SCSI), it should call the 952 * rte_vhost_driver_set_features(), which will overwrite following 953 * two values. 954 */ 955 vsocket->use_builtin_virtio_net = true; 956 if (vsocket->is_vduse) { 957 vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES; 958 vsocket->features = VDUSE_NET_SUPPORTED_FEATURES; 959 } else { 960 vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES; 961 vsocket->features = VHOST_USER_NET_SUPPORTED_FEATURES; 962 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 963 } 964 965 if (vsocket->async_copy) { 966 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL); 967 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL); 968 VHOST_LOG_CONFIG(path, INFO, "logging feature is disabled in async copy mode\n"); 969 } 970 971 /* 972 * We'll not be able to receive a buffer from guest in linear mode 973 * without external buffer if it will not fit in a single mbuf, which is 974 * likely if segmentation offloading enabled. 975 */ 976 if (vsocket->linearbuf && !vsocket->extbuf) { 977 uint64_t seg_offload_features = 978 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 979 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 980 (1ULL << VIRTIO_NET_F_HOST_UFO); 981 982 VHOST_LOG_CONFIG(path, INFO, "Linear buffers requested without external buffers,\n"); 983 VHOST_LOG_CONFIG(path, INFO, "disabling host segmentation offloading support\n"); 984 vsocket->supported_features &= ~seg_offload_features; 985 vsocket->features &= ~seg_offload_features; 986 } 987 988 if (!vsocket->iommu_support) { 989 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 990 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 991 } 992 993 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 994 vsocket->protocol_features &= 995 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 996 } else { 997 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 998 VHOST_LOG_CONFIG(path, ERR, "Postcopy requested but not compiled\n"); 999 ret = -1; 1000 goto out_mutex; 1001 #endif 1002 } 1003 1004 if (!vsocket->is_vduse) { 1005 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 1006 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 1007 if (vsocket->reconnect && reconn_tid.opaque_id == 0) { 1008 if (vhost_user_reconnect_init() != 0) 1009 goto out_mutex; 1010 } 1011 } else { 1012 vsocket->is_server = true; 1013 } 1014 ret = create_unix_socket(vsocket); 1015 if (ret < 0) 1016 goto out_mutex; 1017 } 1018 1019 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 1020 1021 pthread_mutex_unlock(&vhost_user.mutex); 1022 return ret; 1023 1024 out_mutex: 1025 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 1026 VHOST_LOG_CONFIG(path, ERR, "failed to destroy connection mutex\n"); 1027 } 1028 out_free: 1029 vhost_user_socket_mem_free(vsocket); 1030 out: 1031 pthread_mutex_unlock(&vhost_user.mutex); 1032 1033 return ret; 1034 } 1035 1036 static bool 1037 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 1038 { 1039 int found = false; 1040 struct vhost_user_reconnect *reconn, *next; 1041 1042 pthread_mutex_lock(&reconn_list.mutex); 1043 1044 for (reconn = TAILQ_FIRST(&reconn_list.head); 1045 reconn != NULL; reconn = next) { 1046 next = TAILQ_NEXT(reconn, next); 1047 1048 if (reconn->vsocket == vsocket) { 1049 TAILQ_REMOVE(&reconn_list.head, reconn, next); 1050 close(reconn->fd); 1051 free(reconn); 1052 found = true; 1053 break; 1054 } 1055 } 1056 pthread_mutex_unlock(&reconn_list.mutex); 1057 return found; 1058 } 1059 1060 /** 1061 * Unregister the specified vhost socket 1062 */ 1063 int 1064 rte_vhost_driver_unregister(const char *path) 1065 { 1066 int i; 1067 int count; 1068 struct vhost_user_connection *conn, *next; 1069 1070 if (path == NULL) 1071 return -1; 1072 1073 again: 1074 pthread_mutex_lock(&vhost_user.mutex); 1075 1076 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 1077 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 1078 if (strcmp(vsocket->path, path)) 1079 continue; 1080 1081 if (vsocket->is_vduse) { 1082 vduse_device_destroy(path); 1083 } else if (vsocket->is_server) { 1084 /* 1085 * If r/wcb is executing, release vhost_user's 1086 * mutex lock, and try again since the r/wcb 1087 * may use the mutex lock. 1088 */ 1089 if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) { 1090 pthread_mutex_unlock(&vhost_user.mutex); 1091 goto again; 1092 } 1093 } else if (vsocket->reconnect) { 1094 vhost_user_remove_reconnect(vsocket); 1095 } 1096 1097 pthread_mutex_lock(&vsocket->conn_mutex); 1098 for (conn = TAILQ_FIRST(&vsocket->conn_list); 1099 conn != NULL; 1100 conn = next) { 1101 next = TAILQ_NEXT(conn, next); 1102 1103 /* 1104 * If r/wcb is executing, release vsocket's 1105 * conn_mutex and vhost_user's mutex locks, and 1106 * try again since the r/wcb may use the 1107 * conn_mutex and mutex locks. 1108 */ 1109 if (fdset_try_del(&vhost_user.fdset, 1110 conn->connfd) == -1) { 1111 pthread_mutex_unlock(&vsocket->conn_mutex); 1112 pthread_mutex_unlock(&vhost_user.mutex); 1113 goto again; 1114 } 1115 1116 VHOST_LOG_CONFIG(path, INFO, "free connfd %d\n", conn->connfd); 1117 close(conn->connfd); 1118 vhost_destroy_device(conn->vid); 1119 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1120 free(conn); 1121 } 1122 pthread_mutex_unlock(&vsocket->conn_mutex); 1123 1124 if (vsocket->is_server) { 1125 close(vsocket->socket_fd); 1126 unlink(path); 1127 } 1128 1129 pthread_mutex_destroy(&vsocket->conn_mutex); 1130 vhost_user_socket_mem_free(vsocket); 1131 1132 count = --vhost_user.vsocket_cnt; 1133 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1134 vhost_user.vsockets[count] = NULL; 1135 pthread_mutex_unlock(&vhost_user.mutex); 1136 return 0; 1137 } 1138 pthread_mutex_unlock(&vhost_user.mutex); 1139 1140 return -1; 1141 } 1142 1143 /* 1144 * Register ops so that we can add/remove device to data core. 1145 */ 1146 int 1147 rte_vhost_driver_callback_register(const char *path, 1148 struct rte_vhost_device_ops const * const ops) 1149 { 1150 struct vhost_user_socket *vsocket; 1151 1152 pthread_mutex_lock(&vhost_user.mutex); 1153 vsocket = find_vhost_user_socket(path); 1154 if (vsocket) 1155 vsocket->notify_ops = ops; 1156 pthread_mutex_unlock(&vhost_user.mutex); 1157 1158 return vsocket ? 0 : -1; 1159 } 1160 1161 struct rte_vhost_device_ops const * 1162 vhost_driver_callback_get(const char *path) 1163 { 1164 struct vhost_user_socket *vsocket; 1165 1166 pthread_mutex_lock(&vhost_user.mutex); 1167 vsocket = find_vhost_user_socket(path); 1168 pthread_mutex_unlock(&vhost_user.mutex); 1169 1170 return vsocket ? vsocket->notify_ops : NULL; 1171 } 1172 1173 int 1174 rte_vhost_driver_start(const char *path) 1175 { 1176 struct vhost_user_socket *vsocket; 1177 static rte_thread_t fdset_tid; 1178 1179 pthread_mutex_lock(&vhost_user.mutex); 1180 vsocket = find_vhost_user_socket(path); 1181 pthread_mutex_unlock(&vhost_user.mutex); 1182 1183 if (!vsocket) 1184 return -1; 1185 1186 if (vsocket->is_vduse) 1187 return vduse_device_create(path, vsocket->net_compliant_ol_flags); 1188 1189 if (fdset_tid.opaque_id == 0) { 1190 /** 1191 * create a pipe which will be waited by poll and notified to 1192 * rebuild the wait list of poll. 1193 */ 1194 if (fdset_pipe_init(&vhost_user.fdset) < 0) { 1195 VHOST_LOG_CONFIG(path, ERR, "failed to create pipe for vhost fdset\n"); 1196 return -1; 1197 } 1198 1199 int ret = rte_thread_create_internal_control(&fdset_tid, 1200 "vhost-evt", fdset_event_dispatch, &vhost_user.fdset); 1201 if (ret != 0) { 1202 VHOST_LOG_CONFIG(path, ERR, "failed to create fdset handling thread\n"); 1203 fdset_pipe_uninit(&vhost_user.fdset); 1204 return -1; 1205 } 1206 } 1207 1208 if (vsocket->is_server) 1209 return vhost_user_start_server(vsocket); 1210 else 1211 return vhost_user_start_client(vsocket); 1212 } 1213