1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/socket.h> 12 #include <sys/un.h> 13 #include <sys/queue.h> 14 #include <errno.h> 15 #include <fcntl.h> 16 17 #include <rte_thread.h> 18 #include <rte_log.h> 19 20 #include "fd_man.h" 21 #include "vduse.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool is_vduse; 40 bool reconnect; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 bool extbuf; 44 bool linearbuf; 45 bool async_copy; 46 bool net_compliant_ol_flags; 47 bool stats_enabled; 48 bool async_connect; 49 50 /* 51 * The "supported_features" indicates the feature bits the 52 * vhost driver supports. The "features" indicates the feature 53 * bits after the rte_vhost_driver_features_disable/enable(). 54 * It is also the final feature bits used for vhost-user 55 * features negotiation. 56 */ 57 uint64_t supported_features; 58 uint64_t features; 59 60 uint64_t protocol_features; 61 62 uint32_t max_queue_pairs; 63 64 struct rte_vdpa_device *vdpa_dev; 65 66 struct rte_vhost_device_ops const *notify_ops; 67 }; 68 69 struct vhost_user_connection { 70 struct vhost_user_socket *vsocket; 71 int connfd; 72 int vid; 73 74 TAILQ_ENTRY(vhost_user_connection) next; 75 }; 76 77 #define MAX_VHOST_SOCKET 1024 78 struct vhost_user { 79 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 80 struct fdset *fdset; 81 int vsocket_cnt; 82 pthread_mutex_t mutex; 83 }; 84 85 #define MAX_VIRTIO_BACKLOG 128 86 87 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 88 static void vhost_user_read_cb(int fd, void *dat, int *remove); 89 static int create_unix_socket(struct vhost_user_socket *vsocket); 90 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 91 92 static struct vhost_user vhost_user = { 93 .vsocket_cnt = 0, 94 .mutex = PTHREAD_MUTEX_INITIALIZER, 95 }; 96 97 /* 98 * return bytes# of read on success or negative val on failure. Update fdnum 99 * with number of fds read. 100 */ 101 int 102 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds, 103 int *fd_num) 104 { 105 struct iovec iov; 106 struct msghdr msgh; 107 char control[CMSG_SPACE(max_fds * sizeof(int))]; 108 struct cmsghdr *cmsg; 109 int got_fds = 0; 110 int ret; 111 112 *fd_num = 0; 113 114 memset(&msgh, 0, sizeof(msgh)); 115 iov.iov_base = buf; 116 iov.iov_len = buflen; 117 118 msgh.msg_iov = &iov; 119 msgh.msg_iovlen = 1; 120 msgh.msg_control = control; 121 msgh.msg_controllen = sizeof(control); 122 123 ret = recvmsg(sockfd, &msgh, 0); 124 if (ret <= 0) { 125 if (ret) 126 VHOST_CONFIG_LOG(ifname, ERR, "recvmsg failed on fd %d (%s)", 127 sockfd, strerror(errno)); 128 return ret; 129 } 130 131 if (msgh.msg_flags & MSG_TRUNC) 132 VHOST_CONFIG_LOG(ifname, ERR, "truncated msg (fd %d)", sockfd); 133 134 /* MSG_CTRUNC may be caused by LSM misconfiguration */ 135 if (msgh.msg_flags & MSG_CTRUNC) 136 VHOST_CONFIG_LOG(ifname, ERR, "truncated control data (fd %d)", sockfd); 137 138 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 139 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 140 if ((cmsg->cmsg_level == SOL_SOCKET) && 141 (cmsg->cmsg_type == SCM_RIGHTS)) { 142 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 143 *fd_num = got_fds; 144 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 145 break; 146 } 147 } 148 149 /* Clear out unused file descriptors */ 150 while (got_fds < max_fds) 151 fds[got_fds++] = -1; 152 153 return ret; 154 } 155 156 int 157 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num) 158 { 159 160 struct iovec iov; 161 struct msghdr msgh; 162 size_t fdsize = fd_num * sizeof(int); 163 char control[CMSG_SPACE(fdsize)]; 164 struct cmsghdr *cmsg; 165 int ret; 166 167 memset(&msgh, 0, sizeof(msgh)); 168 iov.iov_base = buf; 169 iov.iov_len = buflen; 170 171 msgh.msg_iov = &iov; 172 msgh.msg_iovlen = 1; 173 174 if (fds && fd_num > 0) { 175 msgh.msg_control = control; 176 msgh.msg_controllen = sizeof(control); 177 cmsg = CMSG_FIRSTHDR(&msgh); 178 if (cmsg == NULL) { 179 VHOST_CONFIG_LOG(ifname, ERR, "cmsg == NULL"); 180 errno = EINVAL; 181 return -1; 182 } 183 cmsg->cmsg_len = CMSG_LEN(fdsize); 184 cmsg->cmsg_level = SOL_SOCKET; 185 cmsg->cmsg_type = SCM_RIGHTS; 186 memcpy(CMSG_DATA(cmsg), fds, fdsize); 187 } else { 188 msgh.msg_control = NULL; 189 msgh.msg_controllen = 0; 190 } 191 192 do { 193 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 194 } while (ret < 0 && errno == EINTR); 195 196 if (ret < 0) { 197 VHOST_CONFIG_LOG(ifname, ERR, "sendmsg error on fd %d (%s)", 198 sockfd, strerror(errno)); 199 return ret; 200 } 201 202 return ret; 203 } 204 205 static void 206 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 207 { 208 int vid; 209 size_t size; 210 struct vhost_user_connection *conn; 211 int ret; 212 struct virtio_net *dev; 213 214 if (vsocket == NULL) 215 return; 216 217 conn = malloc(sizeof(*conn)); 218 if (conn == NULL) { 219 close(fd); 220 return; 221 } 222 223 vid = vhost_user_new_device(); 224 if (vid == -1) { 225 goto err; 226 } 227 228 size = strnlen(vsocket->path, PATH_MAX); 229 vhost_set_ifname(vid, vsocket->path, size); 230 231 vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net, 232 vsocket->net_compliant_ol_flags, vsocket->stats_enabled, 233 vsocket->iommu_support); 234 235 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev); 236 237 if (vsocket->extbuf) 238 vhost_enable_extbuf(vid); 239 240 if (vsocket->linearbuf) 241 vhost_enable_linearbuf(vid); 242 243 if (vsocket->async_copy) { 244 dev = get_device(vid); 245 246 if (dev) 247 dev->async_copy = 1; 248 } 249 250 VHOST_CONFIG_LOG(vsocket->path, INFO, "new device, handle is %d", vid); 251 252 if (vsocket->notify_ops->new_connection) { 253 ret = vsocket->notify_ops->new_connection(vid); 254 if (ret < 0) { 255 VHOST_CONFIG_LOG(vsocket->path, ERR, 256 "failed to add vhost user connection with fd %d", 257 fd); 258 goto err_cleanup; 259 } 260 } 261 262 conn->connfd = fd; 263 conn->vsocket = vsocket; 264 conn->vid = vid; 265 ret = fdset_add(vhost_user.fdset, fd, vhost_user_read_cb, 266 NULL, conn); 267 if (ret < 0) { 268 VHOST_CONFIG_LOG(vsocket->path, ERR, 269 "failed to add fd %d into vhost server fdset", 270 fd); 271 272 if (vsocket->notify_ops->destroy_connection) 273 vsocket->notify_ops->destroy_connection(conn->vid); 274 275 goto err_cleanup; 276 } 277 278 pthread_mutex_lock(&vsocket->conn_mutex); 279 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 280 pthread_mutex_unlock(&vsocket->conn_mutex); 281 282 return; 283 284 err_cleanup: 285 vhost_destroy_device(vid); 286 err: 287 free(conn); 288 close(fd); 289 } 290 291 /* call back when there is new vhost-user connection from client */ 292 static void 293 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 294 { 295 struct vhost_user_socket *vsocket = dat; 296 297 fd = accept(fd, NULL, NULL); 298 if (fd < 0) 299 return; 300 301 VHOST_CONFIG_LOG(vsocket->path, INFO, "new vhost user connection is %d", fd); 302 vhost_user_add_connection(fd, vsocket); 303 } 304 305 static void 306 vhost_user_read_cb(int connfd, void *dat, int *remove) 307 { 308 struct vhost_user_connection *conn = dat; 309 struct vhost_user_socket *vsocket = conn->vsocket; 310 int ret; 311 312 ret = vhost_user_msg_handler(conn->vid, connfd); 313 if (ret < 0) { 314 struct virtio_net *dev = get_device(conn->vid); 315 316 close(connfd); 317 *remove = 1; 318 319 if (dev) 320 vhost_destroy_device_notify(dev); 321 322 if (vsocket->notify_ops->destroy_connection) 323 vsocket->notify_ops->destroy_connection(conn->vid); 324 325 vhost_destroy_device(conn->vid); 326 327 if (vsocket->reconnect) { 328 create_unix_socket(vsocket); 329 vhost_user_start_client(vsocket); 330 } 331 332 pthread_mutex_lock(&vsocket->conn_mutex); 333 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 334 pthread_mutex_unlock(&vsocket->conn_mutex); 335 336 free(conn); 337 } 338 } 339 340 static int 341 create_unix_socket(struct vhost_user_socket *vsocket) 342 { 343 int fd; 344 struct sockaddr_un *un = &vsocket->un; 345 346 fd = socket(AF_UNIX, SOCK_STREAM, 0); 347 if (fd < 0) 348 return -1; 349 VHOST_CONFIG_LOG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d", 350 vsocket->is_server ? "server" : "client", fd); 351 352 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 353 VHOST_CONFIG_LOG(vsocket->path, ERR, 354 "vhost-user: can't set nonblocking mode for socket, fd: %d (%s)", 355 fd, strerror(errno)); 356 close(fd); 357 return -1; 358 } 359 360 memset(un, 0, sizeof(*un)); 361 un->sun_family = AF_UNIX; 362 strlcpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 363 364 vsocket->socket_fd = fd; 365 return 0; 366 } 367 368 static int 369 vhost_user_start_server(struct vhost_user_socket *vsocket) 370 { 371 int ret; 372 int fd = vsocket->socket_fd; 373 const char *path = vsocket->path; 374 375 /* 376 * bind () may fail if the socket file with the same name already 377 * exists. But the library obviously should not delete the file 378 * provided by the user, since we can not be sure that it is not 379 * being used by other applications. Moreover, many applications form 380 * socket names based on user input, which is prone to errors. 381 * 382 * The user must ensure that the socket does not exist before 383 * registering the vhost driver in server mode. 384 */ 385 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 386 if (ret < 0) { 387 VHOST_CONFIG_LOG(path, ERR, "failed to bind: %s; remove it and try again", 388 strerror(errno)); 389 goto err; 390 } 391 VHOST_CONFIG_LOG(path, INFO, "binding succeeded"); 392 393 ret = listen(fd, MAX_VIRTIO_BACKLOG); 394 if (ret < 0) 395 goto err; 396 397 ret = fdset_add(vhost_user.fdset, fd, vhost_user_server_new_connection, 398 NULL, vsocket); 399 if (ret < 0) { 400 VHOST_CONFIG_LOG(path, ERR, "failed to add listen fd %d to vhost server fdset", 401 fd); 402 goto err; 403 } 404 405 return 0; 406 407 err: 408 close(fd); 409 return -1; 410 } 411 412 struct vhost_user_reconnect { 413 struct sockaddr_un un; 414 int fd; 415 struct vhost_user_socket *vsocket; 416 417 TAILQ_ENTRY(vhost_user_reconnect) next; 418 }; 419 420 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 421 struct vhost_user_reconnect_list { 422 struct vhost_user_reconnect_tailq_list head; 423 pthread_mutex_t mutex; 424 }; 425 426 static struct vhost_user_reconnect_list reconn_list; 427 static rte_thread_t reconn_tid; 428 429 static int 430 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz) 431 { 432 int ret, flags; 433 434 ret = connect(fd, un, sz); 435 if (ret < 0 && errno != EISCONN) 436 return -1; 437 438 flags = fcntl(fd, F_GETFL, 0); 439 if (flags < 0) { 440 VHOST_CONFIG_LOG(path, ERR, "can't get flags for connfd %d (%s)", 441 fd, strerror(errno)); 442 return -2; 443 } 444 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 445 VHOST_CONFIG_LOG(path, ERR, "can't disable nonblocking on fd %d", fd); 446 return -2; 447 } 448 return 0; 449 } 450 451 static uint32_t 452 vhost_user_client_reconnect(void *arg __rte_unused) 453 { 454 int ret; 455 struct vhost_user_reconnect *reconn, *next; 456 457 while (1) { 458 pthread_mutex_lock(&reconn_list.mutex); 459 460 /* 461 * An equal implementation of TAILQ_FOREACH_SAFE, 462 * which does not exist on all platforms. 463 */ 464 for (reconn = TAILQ_FIRST(&reconn_list.head); 465 reconn != NULL; reconn = next) { 466 next = TAILQ_NEXT(reconn, next); 467 468 ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd, 469 (struct sockaddr *)&reconn->un, 470 sizeof(reconn->un)); 471 if (ret == -2) { 472 close(reconn->fd); 473 VHOST_CONFIG_LOG(reconn->vsocket->path, ERR, 474 "reconnection for fd %d failed", 475 reconn->fd); 476 goto remove_fd; 477 } 478 if (ret == -1) 479 continue; 480 481 VHOST_CONFIG_LOG(reconn->vsocket->path, INFO, "connected"); 482 vhost_user_add_connection(reconn->fd, reconn->vsocket); 483 remove_fd: 484 TAILQ_REMOVE(&reconn_list.head, reconn, next); 485 free(reconn); 486 } 487 488 pthread_mutex_unlock(&reconn_list.mutex); 489 sleep(1); 490 } 491 492 return 0; 493 } 494 495 static int 496 vhost_user_reconnect_init(void) 497 { 498 int ret; 499 500 pthread_mutex_init(&reconn_list.mutex, NULL); 501 TAILQ_INIT(&reconn_list.head); 502 503 ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco", 504 vhost_user_client_reconnect, NULL); 505 if (ret != 0) { 506 VHOST_CONFIG_LOG("thread", ERR, "failed to create reconnect thread"); 507 if (pthread_mutex_destroy(&reconn_list.mutex)) 508 VHOST_CONFIG_LOG("thread", ERR, 509 "%s: failed to destroy reconnect mutex", 510 __func__); 511 } 512 513 return ret; 514 } 515 516 static int 517 vhost_user_start_client(struct vhost_user_socket *vsocket) 518 { 519 int ret; 520 int fd = vsocket->socket_fd; 521 const char *path = vsocket->path; 522 struct vhost_user_reconnect *reconn; 523 524 if (!vsocket->async_connect || !vsocket->reconnect) { 525 ret = vhost_user_connect_nonblock(vsocket->path, fd, 526 (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 527 if (ret == 0) { 528 vhost_user_add_connection(fd, vsocket); 529 return 0; 530 } 531 532 VHOST_CONFIG_LOG(path, WARNING, "failed to connect: %s", strerror(errno)); 533 534 if (ret == -2 || !vsocket->reconnect) { 535 close(fd); 536 return -1; 537 } 538 539 VHOST_CONFIG_LOG(path, INFO, "reconnecting..."); 540 } 541 reconn = malloc(sizeof(*reconn)); 542 if (reconn == NULL) { 543 VHOST_CONFIG_LOG(path, ERR, "failed to allocate memory for reconnect"); 544 close(fd); 545 return -1; 546 } 547 reconn->un = vsocket->un; 548 reconn->fd = fd; 549 reconn->vsocket = vsocket; 550 pthread_mutex_lock(&reconn_list.mutex); 551 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 552 pthread_mutex_unlock(&reconn_list.mutex); 553 554 return 0; 555 } 556 557 static struct vhost_user_socket * 558 find_vhost_user_socket(const char *path) 559 { 560 int i; 561 562 if (path == NULL) 563 return NULL; 564 565 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 566 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 567 568 if (!strcmp(vsocket->path, path)) 569 return vsocket; 570 } 571 572 return NULL; 573 } 574 575 int 576 rte_vhost_driver_attach_vdpa_device(const char *path, 577 struct rte_vdpa_device *dev) 578 { 579 struct vhost_user_socket *vsocket; 580 581 if (dev == NULL || path == NULL) 582 return -1; 583 584 pthread_mutex_lock(&vhost_user.mutex); 585 vsocket = find_vhost_user_socket(path); 586 if (vsocket) 587 vsocket->vdpa_dev = dev; 588 pthread_mutex_unlock(&vhost_user.mutex); 589 590 return vsocket ? 0 : -1; 591 } 592 593 int 594 rte_vhost_driver_detach_vdpa_device(const char *path) 595 { 596 struct vhost_user_socket *vsocket; 597 598 pthread_mutex_lock(&vhost_user.mutex); 599 vsocket = find_vhost_user_socket(path); 600 if (vsocket) 601 vsocket->vdpa_dev = NULL; 602 pthread_mutex_unlock(&vhost_user.mutex); 603 604 return vsocket ? 0 : -1; 605 } 606 607 struct rte_vdpa_device * 608 rte_vhost_driver_get_vdpa_device(const char *path) 609 { 610 struct vhost_user_socket *vsocket; 611 struct rte_vdpa_device *dev = NULL; 612 613 pthread_mutex_lock(&vhost_user.mutex); 614 vsocket = find_vhost_user_socket(path); 615 if (vsocket) 616 dev = vsocket->vdpa_dev; 617 pthread_mutex_unlock(&vhost_user.mutex); 618 619 return dev; 620 } 621 622 int 623 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type) 624 { 625 struct vhost_user_socket *vsocket; 626 struct rte_vdpa_device *vdpa_dev; 627 int ret = 0; 628 629 pthread_mutex_lock(&vhost_user.mutex); 630 vsocket = find_vhost_user_socket(path); 631 if (!vsocket) { 632 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 633 ret = -1; 634 goto unlock_exit; 635 } 636 637 vdpa_dev = vsocket->vdpa_dev; 638 if (!vdpa_dev) { 639 ret = -1; 640 goto unlock_exit; 641 } 642 643 *type = vdpa_dev->type; 644 645 unlock_exit: 646 pthread_mutex_unlock(&vhost_user.mutex); 647 return ret; 648 } 649 650 int 651 rte_vhost_driver_disable_features(const char *path, uint64_t features) 652 { 653 struct vhost_user_socket *vsocket; 654 655 pthread_mutex_lock(&vhost_user.mutex); 656 vsocket = find_vhost_user_socket(path); 657 658 /* Note that use_builtin_virtio_net is not affected by this function 659 * since callers may want to selectively disable features of the 660 * built-in vhost net device backend. 661 */ 662 663 if (vsocket) 664 vsocket->features &= ~features; 665 pthread_mutex_unlock(&vhost_user.mutex); 666 667 return vsocket ? 0 : -1; 668 } 669 670 int 671 rte_vhost_driver_enable_features(const char *path, uint64_t features) 672 { 673 struct vhost_user_socket *vsocket; 674 675 pthread_mutex_lock(&vhost_user.mutex); 676 vsocket = find_vhost_user_socket(path); 677 if (vsocket) { 678 if ((vsocket->supported_features & features) != features) { 679 /* 680 * trying to enable features the driver doesn't 681 * support. 682 */ 683 pthread_mutex_unlock(&vhost_user.mutex); 684 return -1; 685 } 686 vsocket->features |= features; 687 } 688 pthread_mutex_unlock(&vhost_user.mutex); 689 690 return vsocket ? 0 : -1; 691 } 692 693 int 694 rte_vhost_driver_set_features(const char *path, uint64_t features) 695 { 696 struct vhost_user_socket *vsocket; 697 698 pthread_mutex_lock(&vhost_user.mutex); 699 vsocket = find_vhost_user_socket(path); 700 if (vsocket) { 701 vsocket->supported_features = features; 702 vsocket->features = features; 703 704 /* Anyone setting feature bits is implementing their own vhost 705 * device backend. 706 */ 707 vsocket->use_builtin_virtio_net = false; 708 } 709 pthread_mutex_unlock(&vhost_user.mutex); 710 711 return vsocket ? 0 : -1; 712 } 713 714 int 715 rte_vhost_driver_get_features(const char *path, uint64_t *features) 716 { 717 struct vhost_user_socket *vsocket; 718 uint64_t vdpa_features; 719 struct rte_vdpa_device *vdpa_dev; 720 int ret = 0; 721 722 pthread_mutex_lock(&vhost_user.mutex); 723 vsocket = find_vhost_user_socket(path); 724 if (!vsocket) { 725 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 726 ret = -1; 727 goto unlock_exit; 728 } 729 730 vdpa_dev = vsocket->vdpa_dev; 731 if (!vdpa_dev) { 732 *features = vsocket->features; 733 goto unlock_exit; 734 } 735 736 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) { 737 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa features for socket file."); 738 ret = -1; 739 goto unlock_exit; 740 } 741 742 *features = vsocket->features & vdpa_features; 743 744 unlock_exit: 745 pthread_mutex_unlock(&vhost_user.mutex); 746 return ret; 747 } 748 749 int 750 rte_vhost_driver_set_protocol_features(const char *path, 751 uint64_t protocol_features) 752 { 753 struct vhost_user_socket *vsocket; 754 755 pthread_mutex_lock(&vhost_user.mutex); 756 vsocket = find_vhost_user_socket(path); 757 if (vsocket) 758 vsocket->protocol_features = protocol_features; 759 pthread_mutex_unlock(&vhost_user.mutex); 760 return vsocket ? 0 : -1; 761 } 762 763 int 764 rte_vhost_driver_get_protocol_features(const char *path, 765 uint64_t *protocol_features) 766 { 767 struct vhost_user_socket *vsocket; 768 uint64_t vdpa_protocol_features; 769 struct rte_vdpa_device *vdpa_dev; 770 int ret = 0; 771 772 pthread_mutex_lock(&vhost_user.mutex); 773 vsocket = find_vhost_user_socket(path); 774 if (!vsocket) { 775 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 776 ret = -1; 777 goto unlock_exit; 778 } 779 780 vdpa_dev = vsocket->vdpa_dev; 781 if (!vdpa_dev) { 782 *protocol_features = vsocket->protocol_features; 783 goto unlock_exit; 784 } 785 786 if (vdpa_dev->ops->get_protocol_features(vdpa_dev, 787 &vdpa_protocol_features) < 0) { 788 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa protocol features."); 789 ret = -1; 790 goto unlock_exit; 791 } 792 793 *protocol_features = vsocket->protocol_features 794 & vdpa_protocol_features; 795 796 unlock_exit: 797 pthread_mutex_unlock(&vhost_user.mutex); 798 return ret; 799 } 800 801 int 802 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 803 { 804 struct vhost_user_socket *vsocket; 805 uint32_t vdpa_queue_num; 806 struct rte_vdpa_device *vdpa_dev; 807 int ret = 0; 808 809 pthread_mutex_lock(&vhost_user.mutex); 810 vsocket = find_vhost_user_socket(path); 811 if (!vsocket) { 812 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 813 ret = -1; 814 goto unlock_exit; 815 } 816 817 vdpa_dev = vsocket->vdpa_dev; 818 if (!vdpa_dev) { 819 *queue_num = vsocket->max_queue_pairs; 820 goto unlock_exit; 821 } 822 823 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) { 824 VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa queue number."); 825 ret = -1; 826 goto unlock_exit; 827 } 828 829 *queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num); 830 831 unlock_exit: 832 pthread_mutex_unlock(&vhost_user.mutex); 833 return ret; 834 } 835 836 int 837 rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs) 838 { 839 struct vhost_user_socket *vsocket; 840 int ret = 0; 841 842 pthread_mutex_lock(&vhost_user.mutex); 843 vsocket = find_vhost_user_socket(path); 844 if (!vsocket) { 845 VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet."); 846 ret = -1; 847 goto unlock_exit; 848 } 849 850 /* 851 * This is only useful for VDUSE for which number of virtqueues is set 852 * by the backend. For Vhost-user, the number of virtqueues is defined 853 * by the frontend. 854 */ 855 if (!vsocket->is_vduse) { 856 VHOST_CONFIG_LOG(path, DEBUG, 857 "Keeping %u max queue pairs for Vhost-user backend", 858 VHOST_MAX_QUEUE_PAIRS); 859 goto unlock_exit; 860 } 861 862 VHOST_CONFIG_LOG(path, INFO, "Setting max queue pairs to %u", max_queue_pairs); 863 864 if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) { 865 VHOST_CONFIG_LOG(path, ERR, "Library only supports up to %u queue pairs", 866 VHOST_MAX_QUEUE_PAIRS); 867 ret = -1; 868 goto unlock_exit; 869 } 870 871 vsocket->max_queue_pairs = max_queue_pairs; 872 873 unlock_exit: 874 pthread_mutex_unlock(&vhost_user.mutex); 875 return ret; 876 } 877 878 static void 879 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 880 { 881 if (vsocket == NULL) 882 return; 883 884 free(vsocket->path); 885 free(vsocket); 886 } 887 888 /* 889 * Register a new vhost-user socket; here we could act as server 890 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 891 * is set. 892 */ 893 int 894 rte_vhost_driver_register(const char *path, uint64_t flags) 895 { 896 int ret = -1; 897 struct vhost_user_socket *vsocket; 898 899 if (!path) 900 return -1; 901 902 pthread_mutex_lock(&vhost_user.mutex); 903 904 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 905 VHOST_CONFIG_LOG(path, ERR, "the number of vhost sockets reaches maximum"); 906 goto out; 907 } 908 909 vsocket = malloc(sizeof(struct vhost_user_socket)); 910 if (!vsocket) 911 goto out; 912 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 913 vsocket->path = strdup(path); 914 if (vsocket->path == NULL) { 915 VHOST_CONFIG_LOG(path, ERR, "failed to copy socket path string"); 916 vhost_user_socket_mem_free(vsocket); 917 goto out; 918 } 919 TAILQ_INIT(&vsocket->conn_list); 920 pthread_mutex_init(&vsocket->conn_mutex, NULL); 921 922 if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/"))) 923 vsocket->is_vduse = true; 924 925 vsocket->vdpa_dev = NULL; 926 vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS; 927 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; 928 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; 929 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; 930 vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 931 vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE; 932 vsocket->async_connect = flags & RTE_VHOST_USER_ASYNC_CONNECT; 933 if (vsocket->is_vduse) 934 vsocket->iommu_support = true; 935 else 936 vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT; 937 938 if (vsocket->async_copy && (vsocket->iommu_support || 939 (flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) { 940 VHOST_CONFIG_LOG(path, ERR, "async copy with IOMMU or post-copy not supported"); 941 goto out_mutex; 942 } 943 944 /* 945 * Set the supported features correctly for the builtin vhost-user 946 * net driver. 947 * 948 * Applications know nothing about features the builtin virtio net 949 * driver (virtio_net.c) supports, thus it's not possible for them 950 * to invoke rte_vhost_driver_set_features(). To workaround it, here 951 * we set it unconditionally. If the application want to implement 952 * another vhost-user driver (say SCSI), it should call the 953 * rte_vhost_driver_set_features(), which will overwrite following 954 * two values. 955 */ 956 vsocket->use_builtin_virtio_net = true; 957 if (vsocket->is_vduse) { 958 vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES; 959 vsocket->features = VDUSE_NET_SUPPORTED_FEATURES; 960 } else { 961 vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES; 962 vsocket->features = VHOST_USER_NET_SUPPORTED_FEATURES; 963 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 964 } 965 966 if (vsocket->async_copy) { 967 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL); 968 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL); 969 VHOST_CONFIG_LOG(path, INFO, "logging feature is disabled in async copy mode"); 970 } 971 972 /* 973 * We'll not be able to receive a buffer from guest in linear mode 974 * without external buffer if it will not fit in a single mbuf, which is 975 * likely if segmentation offloading enabled. 976 */ 977 if (vsocket->linearbuf && !vsocket->extbuf) { 978 uint64_t seg_offload_features = 979 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 980 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 981 (1ULL << VIRTIO_NET_F_HOST_UFO); 982 983 VHOST_CONFIG_LOG(path, INFO, "Linear buffers requested without external buffers,"); 984 VHOST_CONFIG_LOG(path, INFO, "disabling host segmentation offloading support"); 985 vsocket->supported_features &= ~seg_offload_features; 986 vsocket->features &= ~seg_offload_features; 987 } 988 989 if (!vsocket->iommu_support) { 990 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 991 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 992 } 993 994 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 995 vsocket->protocol_features &= 996 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 997 } else { 998 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 999 VHOST_CONFIG_LOG(path, ERR, "Postcopy requested but not compiled"); 1000 ret = -1; 1001 goto out_mutex; 1002 #endif 1003 } 1004 1005 if (!vsocket->is_vduse) { 1006 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 1007 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 1008 if (vsocket->reconnect && reconn_tid.opaque_id == 0) { 1009 if (vhost_user_reconnect_init() != 0) 1010 goto out_mutex; 1011 } 1012 } else { 1013 vsocket->is_server = true; 1014 } 1015 ret = create_unix_socket(vsocket); 1016 if (ret < 0) 1017 goto out_mutex; 1018 } 1019 1020 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 1021 1022 pthread_mutex_unlock(&vhost_user.mutex); 1023 return ret; 1024 1025 out_mutex: 1026 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 1027 VHOST_CONFIG_LOG(path, ERR, "failed to destroy connection mutex"); 1028 } 1029 out: 1030 pthread_mutex_unlock(&vhost_user.mutex); 1031 1032 return ret; 1033 } 1034 1035 static bool 1036 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 1037 { 1038 int found = false; 1039 struct vhost_user_reconnect *reconn, *next; 1040 1041 pthread_mutex_lock(&reconn_list.mutex); 1042 1043 for (reconn = TAILQ_FIRST(&reconn_list.head); 1044 reconn != NULL; reconn = next) { 1045 next = TAILQ_NEXT(reconn, next); 1046 1047 if (reconn->vsocket == vsocket) { 1048 TAILQ_REMOVE(&reconn_list.head, reconn, next); 1049 close(reconn->fd); 1050 free(reconn); 1051 found = true; 1052 break; 1053 } 1054 } 1055 pthread_mutex_unlock(&reconn_list.mutex); 1056 return found; 1057 } 1058 1059 /** 1060 * Unregister the specified vhost socket 1061 */ 1062 int 1063 rte_vhost_driver_unregister(const char *path) 1064 { 1065 int i; 1066 int count; 1067 struct vhost_user_connection *conn, *next; 1068 1069 if (path == NULL) 1070 return -1; 1071 1072 again: 1073 pthread_mutex_lock(&vhost_user.mutex); 1074 1075 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 1076 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 1077 if (strcmp(vsocket->path, path)) 1078 continue; 1079 1080 if (vsocket->is_vduse) { 1081 vduse_device_destroy(path); 1082 } else if (vsocket->is_server) { 1083 /* 1084 * If r/wcb is executing, release vhost_user's 1085 * mutex lock, and try again since the r/wcb 1086 * may use the mutex lock. 1087 */ 1088 if (fdset_try_del(vhost_user.fdset, vsocket->socket_fd) == -1) { 1089 pthread_mutex_unlock(&vhost_user.mutex); 1090 goto again; 1091 } 1092 } else if (vsocket->reconnect) { 1093 vhost_user_remove_reconnect(vsocket); 1094 } 1095 1096 pthread_mutex_lock(&vsocket->conn_mutex); 1097 for (conn = TAILQ_FIRST(&vsocket->conn_list); 1098 conn != NULL; 1099 conn = next) { 1100 next = TAILQ_NEXT(conn, next); 1101 1102 /* 1103 * If r/wcb is executing, release vsocket's 1104 * conn_mutex and vhost_user's mutex locks, and 1105 * try again since the r/wcb may use the 1106 * conn_mutex and mutex locks. 1107 */ 1108 if (fdset_try_del(vhost_user.fdset, 1109 conn->connfd) == -1) { 1110 pthread_mutex_unlock(&vsocket->conn_mutex); 1111 pthread_mutex_unlock(&vhost_user.mutex); 1112 goto again; 1113 } 1114 1115 VHOST_CONFIG_LOG(path, INFO, "free connfd %d", conn->connfd); 1116 close(conn->connfd); 1117 vhost_destroy_device(conn->vid); 1118 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1119 free(conn); 1120 } 1121 pthread_mutex_unlock(&vsocket->conn_mutex); 1122 1123 if (vsocket->is_server) { 1124 close(vsocket->socket_fd); 1125 unlink(path); 1126 } 1127 1128 pthread_mutex_destroy(&vsocket->conn_mutex); 1129 vhost_user_socket_mem_free(vsocket); 1130 1131 count = --vhost_user.vsocket_cnt; 1132 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1133 vhost_user.vsockets[count] = NULL; 1134 pthread_mutex_unlock(&vhost_user.mutex); 1135 return 0; 1136 } 1137 pthread_mutex_unlock(&vhost_user.mutex); 1138 1139 return -1; 1140 } 1141 1142 /* 1143 * Register ops so that we can add/remove device to data core. 1144 */ 1145 int 1146 rte_vhost_driver_callback_register(const char *path, 1147 struct rte_vhost_device_ops const * const ops) 1148 { 1149 struct vhost_user_socket *vsocket; 1150 1151 pthread_mutex_lock(&vhost_user.mutex); 1152 vsocket = find_vhost_user_socket(path); 1153 if (vsocket) 1154 vsocket->notify_ops = ops; 1155 pthread_mutex_unlock(&vhost_user.mutex); 1156 1157 return vsocket ? 0 : -1; 1158 } 1159 1160 struct rte_vhost_device_ops const * 1161 vhost_driver_callback_get(const char *path) 1162 { 1163 struct vhost_user_socket *vsocket; 1164 1165 pthread_mutex_lock(&vhost_user.mutex); 1166 vsocket = find_vhost_user_socket(path); 1167 pthread_mutex_unlock(&vhost_user.mutex); 1168 1169 return vsocket ? vsocket->notify_ops : NULL; 1170 } 1171 1172 int 1173 rte_vhost_driver_start(const char *path) 1174 { 1175 struct vhost_user_socket *vsocket; 1176 1177 pthread_mutex_lock(&vhost_user.mutex); 1178 vsocket = find_vhost_user_socket(path); 1179 pthread_mutex_unlock(&vhost_user.mutex); 1180 1181 if (!vsocket) 1182 return -1; 1183 1184 if (vsocket->is_vduse) 1185 return vduse_device_create(path, vsocket->net_compliant_ol_flags); 1186 1187 if (vhost_user.fdset == NULL) { 1188 vhost_user.fdset = fdset_init("vhost-evt"); 1189 if (vhost_user.fdset == NULL) { 1190 VHOST_CONFIG_LOG(path, ERR, "failed to init Vhost-user fdset"); 1191 return -1; 1192 } 1193 } 1194 1195 if (vsocket->is_server) 1196 return vhost_user_start_server(vsocket); 1197 else 1198 return vhost_user_start_client(vsocket); 1199 } 1200