1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include <sys/eventfd.h> 9 10 #include "spdk/string.h" 11 #include "spdk/config.h" 12 #include "spdk/util.h" 13 14 #include "spdk_internal/virtio.h" 15 #include "spdk_internal/vhost_user.h" 16 17 /* The version of the protocol we support */ 18 #define VHOST_USER_VERSION 0x1 19 20 #define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \ 21 ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ 22 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) 23 24 struct virtio_user_dev { 25 int vhostfd; 26 27 int callfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 28 int kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 29 uint32_t queue_size; 30 31 uint8_t status; 32 char path[PATH_MAX]; 33 uint64_t protocol_features; 34 struct vring vrings[SPDK_VIRTIO_MAX_VIRTQUEUES]; 35 struct spdk_mem_map *mem_map; 36 }; 37 38 static int 39 vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) 40 { 41 int r; 42 struct msghdr msgh; 43 struct iovec iov; 44 size_t fd_size = fd_num * sizeof(int); 45 char control[CMSG_SPACE(fd_size)]; 46 struct cmsghdr *cmsg; 47 48 memset(&msgh, 0, sizeof(msgh)); 49 memset(control, 0, sizeof(control)); 50 51 iov.iov_base = (uint8_t *)buf; 52 iov.iov_len = len; 53 54 msgh.msg_iov = &iov; 55 msgh.msg_iovlen = 1; 56 57 if (fds && fd_num > 0) { 58 msgh.msg_control = control; 59 msgh.msg_controllen = sizeof(control); 60 cmsg = CMSG_FIRSTHDR(&msgh); 61 cmsg->cmsg_len = CMSG_LEN(fd_size); 62 cmsg->cmsg_level = SOL_SOCKET; 63 cmsg->cmsg_type = SCM_RIGHTS; 64 memcpy(CMSG_DATA(cmsg), fds, fd_size); 65 } else { 66 msgh.msg_control = NULL; 67 msgh.msg_controllen = 0; 68 } 69 70 do { 71 r = sendmsg(fd, &msgh, 0); 72 } while (r < 0 && errno == EINTR); 73 74 if (r == -1) { 75 return -errno; 76 } 77 78 return 0; 79 } 80 81 static int 82 vhost_user_read(int fd, struct vhost_user_msg *msg) 83 { 84 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; 85 ssize_t ret; 86 size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; 87 88 ret = recv(fd, (void *)msg, sz_hdr, 0); 89 if ((size_t)ret != sz_hdr) { 90 SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n", 91 ret, sz_hdr); 92 if (ret == -1) { 93 return -errno; 94 } else { 95 return -EBUSY; 96 } 97 } 98 99 /* validate msg flags */ 100 if (msg->flags != (valid_flags)) { 101 SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n", 102 msg->flags, valid_flags); 103 return -EIO; 104 } 105 106 sz_payload = msg->size; 107 108 if (sz_payload > VHOST_USER_PAYLOAD_SIZE) { 109 SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n", 110 sz_payload, VHOST_USER_PAYLOAD_SIZE); 111 return -EIO; 112 } 113 114 if (sz_payload) { 115 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); 116 if ((size_t)ret != sz_payload) { 117 SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n", 118 ret, msg->size); 119 if (ret == -1) { 120 return -errno; 121 } else { 122 return -EBUSY; 123 } 124 } 125 } 126 127 return 0; 128 } 129 130 struct hugepage_file_info { 131 uint64_t addr; /**< virtual addr */ 132 size_t size; /**< the file size */ 133 char path[PATH_MAX]; /**< path to backing file */ 134 }; 135 136 /* Two possible options: 137 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file 138 * array. This is simple but cannot be used in secondary process because 139 * secondary process will close and munmap that file. 140 * 2. Match HUGEFILE_FMT to find hugepage files directly. 141 * 142 * We choose option 2. 143 */ 144 static int 145 get_hugepage_file_info(struct hugepage_file_info hugepages[], int max) 146 { 147 int idx, rc; 148 FILE *f; 149 char buf[BUFSIZ], *tmp, *tail; 150 char *str_underline, *str_start; 151 int huge_index; 152 uint64_t v_start, v_end; 153 154 f = fopen("/proc/self/maps", "r"); 155 if (!f) { 156 SPDK_ERRLOG("cannot open /proc/self/maps\n"); 157 rc = -errno; 158 assert(rc < 0); /* scan-build hack */ 159 return rc; 160 } 161 162 idx = 0; 163 while (fgets(buf, sizeof(buf), f) != NULL) { 164 if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { 165 SPDK_ERRLOG("Failed to parse address\n"); 166 rc = -EIO; 167 goto out; 168 } 169 170 tmp = strchr(buf, ' ') + 1; /** skip address */ 171 tmp = strchr(tmp, ' ') + 1; /** skip perm */ 172 tmp = strchr(tmp, ' ') + 1; /** skip offset */ 173 tmp = strchr(tmp, ' ') + 1; /** skip dev */ 174 tmp = strchr(tmp, ' ') + 1; /** skip inode */ 175 while (*tmp == ' ') { /** skip spaces */ 176 tmp++; 177 } 178 tail = strrchr(tmp, '\n'); /** remove newline if exists */ 179 if (tail) { 180 *tail = '\0'; 181 } 182 183 /* Match HUGEFILE_FMT, aka "%s/%smap_%d", 184 * which is defined in eal_filesystem.h 185 */ 186 str_underline = strrchr(tmp, '_'); 187 if (!str_underline) { 188 continue; 189 } 190 191 str_start = str_underline - strlen("map"); 192 if (str_start < tmp) { 193 continue; 194 } 195 196 if (sscanf(str_start, "map_%d", &huge_index) != 1) { 197 continue; 198 } 199 200 if (idx >= max) { 201 SPDK_ERRLOG("Exceed maximum of %d\n", max); 202 rc = -ENOSPC; 203 goto out; 204 } 205 206 if (idx > 0 && 207 strncmp(tmp, hugepages[idx - 1].path, PATH_MAX) == 0 && 208 v_start == hugepages[idx - 1].addr + hugepages[idx - 1].size) { 209 hugepages[idx - 1].size += (v_end - v_start); 210 continue; 211 } 212 213 hugepages[idx].addr = v_start; 214 hugepages[idx].size = v_end - v_start; 215 snprintf(hugepages[idx].path, PATH_MAX, "%s", tmp); 216 idx++; 217 } 218 219 rc = idx; 220 out: 221 fclose(f); 222 return rc; 223 } 224 225 static int 226 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) 227 { 228 int i, num; 229 struct hugepage_file_info hugepages[VHOST_USER_MEMORY_MAX_NREGIONS]; 230 231 num = get_hugepage_file_info(hugepages, VHOST_USER_MEMORY_MAX_NREGIONS); 232 if (num < 0) { 233 SPDK_ERRLOG("Failed to prepare memory for vhost-user\n"); 234 return num; 235 } 236 237 for (i = 0; i < num; ++i) { 238 /* the memory regions are unaligned */ 239 msg->payload.memory.regions[i].guest_phys_addr = hugepages[i].addr; /* use vaddr! */ 240 msg->payload.memory.regions[i].userspace_addr = hugepages[i].addr; 241 msg->payload.memory.regions[i].memory_size = hugepages[i].size; 242 msg->payload.memory.regions[i].flags_padding = 0; 243 fds[i] = open(hugepages[i].path, O_RDWR); 244 } 245 246 msg->payload.memory.nregions = num; 247 msg->payload.memory.padding = 0; 248 249 return 0; 250 } 251 252 static const char *const vhost_msg_strings[VHOST_USER_MAX] = { 253 [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", 254 [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", 255 [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", 256 [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", 257 [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", 258 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", 259 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", 260 [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", 261 [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", 262 [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", 263 [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", 264 [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", 265 [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", 266 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", 267 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", 268 [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG", 269 [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG", 270 }; 271 272 static int 273 vhost_user_sock(struct virtio_user_dev *dev, 274 enum vhost_user_request req, 275 void *arg) 276 { 277 struct vhost_user_msg msg; 278 struct vhost_vring_file *file = 0; 279 int need_reply = 0; 280 int fds[VHOST_USER_MEMORY_MAX_NREGIONS]; 281 int fd_num = 0; 282 int i, len, rc; 283 int vhostfd = dev->vhostfd; 284 285 SPDK_DEBUGLOG(virtio_user, "sent message %d = %s\n", req, vhost_msg_strings[req]); 286 287 msg.request = req; 288 msg.flags = VHOST_USER_VERSION; 289 msg.size = 0; 290 291 switch (req) { 292 case VHOST_USER_GET_FEATURES: 293 case VHOST_USER_GET_PROTOCOL_FEATURES: 294 case VHOST_USER_GET_QUEUE_NUM: 295 need_reply = 1; 296 break; 297 298 case VHOST_USER_SET_FEATURES: 299 case VHOST_USER_SET_LOG_BASE: 300 case VHOST_USER_SET_PROTOCOL_FEATURES: 301 msg.payload.u64 = *((__u64 *)arg); 302 msg.size = sizeof(msg.payload.u64); 303 break; 304 305 case VHOST_USER_SET_OWNER: 306 case VHOST_USER_RESET_OWNER: 307 break; 308 309 case VHOST_USER_SET_MEM_TABLE: 310 rc = prepare_vhost_memory_user(&msg, fds); 311 if (rc < 0) { 312 return rc; 313 } 314 fd_num = msg.payload.memory.nregions; 315 msg.size = sizeof(msg.payload.memory.nregions); 316 msg.size += sizeof(msg.payload.memory.padding); 317 msg.size += fd_num * sizeof(struct vhost_memory_region); 318 break; 319 320 case VHOST_USER_SET_LOG_FD: 321 fds[fd_num++] = *((int *)arg); 322 break; 323 324 case VHOST_USER_SET_VRING_NUM: 325 case VHOST_USER_SET_VRING_BASE: 326 case VHOST_USER_SET_VRING_ENABLE: 327 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 328 msg.size = sizeof(msg.payload.state); 329 break; 330 331 case VHOST_USER_GET_VRING_BASE: 332 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 333 msg.size = sizeof(msg.payload.state); 334 need_reply = 1; 335 break; 336 337 case VHOST_USER_SET_VRING_ADDR: 338 memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); 339 msg.size = sizeof(msg.payload.addr); 340 break; 341 342 case VHOST_USER_SET_VRING_KICK: 343 case VHOST_USER_SET_VRING_CALL: 344 case VHOST_USER_SET_VRING_ERR: 345 file = arg; 346 msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 347 msg.size = sizeof(msg.payload.u64); 348 if (file->fd > 0) { 349 fds[fd_num++] = file->fd; 350 } else { 351 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 352 } 353 break; 354 355 case VHOST_USER_GET_CONFIG: 356 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 357 msg.size = sizeof(msg.payload.cfg); 358 need_reply = 1; 359 break; 360 361 case VHOST_USER_SET_CONFIG: 362 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 363 msg.size = sizeof(msg.payload.cfg); 364 break; 365 366 default: 367 SPDK_ERRLOG("trying to send unknown msg\n"); 368 return -EINVAL; 369 } 370 371 len = VHOST_USER_HDR_SIZE + msg.size; 372 rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num); 373 if (rc < 0) { 374 SPDK_ERRLOG("%s failed: %s\n", 375 vhost_msg_strings[req], spdk_strerror(-rc)); 376 return rc; 377 } 378 379 if (req == VHOST_USER_SET_MEM_TABLE) 380 for (i = 0; i < fd_num; ++i) { 381 close(fds[i]); 382 } 383 384 if (need_reply) { 385 rc = vhost_user_read(vhostfd, &msg); 386 if (rc < 0) { 387 SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc)); 388 return rc; 389 } 390 391 if (req != msg.request) { 392 SPDK_WARNLOG("Received unexpected msg type\n"); 393 return -EIO; 394 } 395 396 switch (req) { 397 case VHOST_USER_GET_FEATURES: 398 case VHOST_USER_GET_PROTOCOL_FEATURES: 399 case VHOST_USER_GET_QUEUE_NUM: 400 if (msg.size != sizeof(msg.payload.u64)) { 401 SPDK_WARNLOG("Received bad msg size\n"); 402 return -EIO; 403 } 404 *((__u64 *)arg) = msg.payload.u64; 405 break; 406 case VHOST_USER_GET_VRING_BASE: 407 if (msg.size != sizeof(msg.payload.state)) { 408 SPDK_WARNLOG("Received bad msg size\n"); 409 return -EIO; 410 } 411 memcpy(arg, &msg.payload.state, 412 sizeof(struct vhost_vring_state)); 413 break; 414 case VHOST_USER_GET_CONFIG: 415 if (msg.size != sizeof(msg.payload.cfg)) { 416 SPDK_WARNLOG("Received bad msg size\n"); 417 return -EIO; 418 } 419 memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg)); 420 break; 421 default: 422 SPDK_WARNLOG("Received unexpected msg type\n"); 423 return -EBADMSG; 424 } 425 } 426 427 return 0; 428 } 429 430 /** 431 * Set up environment to talk with a vhost user backend. 432 * 433 * @return 434 * - (-1) if fail; 435 * - (0) if succeed. 436 */ 437 static int 438 vhost_user_setup(struct virtio_user_dev *dev) 439 { 440 int fd; 441 int flag; 442 struct sockaddr_un un; 443 ssize_t rc; 444 445 fd = socket(AF_UNIX, SOCK_STREAM, 0); 446 if (fd < 0) { 447 SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno)); 448 return -errno; 449 } 450 451 flag = fcntl(fd, F_GETFD); 452 if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) { 453 SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno)); 454 } 455 456 memset(&un, 0, sizeof(un)); 457 un.sun_family = AF_UNIX; 458 rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); 459 if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) { 460 SPDK_ERRLOG("socket path too long\n"); 461 close(fd); 462 if (rc < 0) { 463 return -errno; 464 } else { 465 return -EINVAL; 466 } 467 } 468 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { 469 SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno)); 470 close(fd); 471 return -errno; 472 } 473 474 dev->vhostfd = fd; 475 return 0; 476 } 477 478 static int 479 virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel) 480 { 481 struct virtio_user_dev *dev = vdev->ctx; 482 483 /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come 484 * firstly because vhost depends on this msg to allocate virtqueue 485 * pair. 486 */ 487 struct vhost_vring_file file; 488 489 file.index = queue_sel; 490 file.fd = dev->callfds[queue_sel]; 491 return vhost_user_sock(dev, VHOST_USER_SET_VRING_CALL, &file); 492 } 493 494 static int 495 virtio_user_set_vring_addr(struct virtio_dev *vdev, uint32_t queue_sel) 496 { 497 struct virtio_user_dev *dev = vdev->ctx; 498 struct vring *vring = &dev->vrings[queue_sel]; 499 struct vhost_vring_addr addr = { 500 .index = queue_sel, 501 .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, 502 .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, 503 .used_user_addr = (uint64_t)(uintptr_t)vring->used, 504 .log_guest_addr = 0, 505 .flags = 0, /* disable log */ 506 }; 507 508 return vhost_user_sock(dev, VHOST_USER_SET_VRING_ADDR, &addr); 509 } 510 511 static int 512 virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel) 513 { 514 struct virtio_user_dev *dev = vdev->ctx; 515 struct vhost_vring_file file; 516 struct vhost_vring_state state; 517 struct vring *vring = &dev->vrings[queue_sel]; 518 int rc; 519 520 state.index = queue_sel; 521 state.num = vring->num; 522 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_NUM, &state); 523 if (rc < 0) { 524 return rc; 525 } 526 527 state.index = queue_sel; 528 state.num = 0; /* no reservation */ 529 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_BASE, &state); 530 if (rc < 0) { 531 return rc; 532 } 533 534 virtio_user_set_vring_addr(vdev, queue_sel); 535 536 /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes 537 * lastly because vhost depends on this msg to judge if 538 * virtio is ready. 539 */ 540 file.index = queue_sel; 541 file.fd = dev->kickfds[queue_sel]; 542 return vhost_user_sock(dev, VHOST_USER_SET_VRING_KICK, &file); 543 } 544 545 static int 546 virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel) 547 { 548 struct virtio_user_dev *dev = vdev->ctx; 549 struct vhost_vring_state state; 550 551 state.index = queue_sel; 552 state.num = 0; 553 554 return vhost_user_sock(dev, VHOST_USER_GET_VRING_BASE, &state); 555 } 556 557 static int 558 virtio_user_queue_setup(struct virtio_dev *vdev, 559 int (*fn)(struct virtio_dev *, uint32_t)) 560 { 561 uint32_t i; 562 int rc; 563 564 for (i = 0; i < vdev->max_queues; ++i) { 565 rc = fn(vdev, i); 566 if (rc < 0) { 567 SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i); 568 return rc; 569 } 570 } 571 572 return 0; 573 } 574 575 static int 576 virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map, 577 enum spdk_mem_map_notify_action action, 578 void *vaddr, size_t size) 579 { 580 struct virtio_dev *vdev = cb_ctx; 581 struct virtio_user_dev *dev = vdev->ctx; 582 uint64_t features; 583 int ret; 584 585 /* We have to resend all mappings anyway, so don't bother with any 586 * page tracking. 587 */ 588 ret = vhost_user_sock(dev, VHOST_USER_SET_MEM_TABLE, NULL); 589 if (ret < 0) { 590 return ret; 591 } 592 593 /* Since we might want to use that mapping straight away, we have to 594 * make sure the guest has already processed our SET_MEM_TABLE message. 595 * F_REPLY_ACK is just a feature and the host is not obliged to 596 * support it, so we send a simple message that always has a response 597 * and we wait for that response. Messages are always processed in order. 598 */ 599 return vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features); 600 } 601 602 static int 603 virtio_user_register_mem(struct virtio_dev *vdev) 604 { 605 struct virtio_user_dev *dev = vdev->ctx; 606 const struct spdk_mem_map_ops virtio_user_map_ops = { 607 .notify_cb = virtio_user_map_notify, 608 .are_contiguous = NULL 609 }; 610 611 dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev); 612 if (dev->mem_map == NULL) { 613 SPDK_ERRLOG("spdk_mem_map_alloc() failed\n"); 614 return -1; 615 } 616 617 return 0; 618 } 619 620 static void 621 virtio_user_unregister_mem(struct virtio_dev *vdev) 622 { 623 struct virtio_user_dev *dev = vdev->ctx; 624 625 spdk_mem_map_free(&dev->mem_map); 626 } 627 628 static int 629 virtio_user_start_device(struct virtio_dev *vdev) 630 { 631 struct virtio_user_dev *dev = vdev->ctx; 632 uint64_t host_max_queues; 633 int ret; 634 635 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 && 636 vdev->max_queues > 1 + vdev->fixed_queues_num) { 637 SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the " 638 "host doesn't support VHOST_USER_PROTOCOL_F_MQ. " 639 "Only one request queue will be used.\n", 640 vdev->name, vdev->max_queues - vdev->fixed_queues_num); 641 vdev->max_queues = 1 + vdev->fixed_queues_num; 642 } 643 644 /* negotiate the number of I/O queues. */ 645 ret = vhost_user_sock(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues); 646 if (ret < 0) { 647 return ret; 648 } 649 650 if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) { 651 SPDK_WARNLOG("%s: requested %"PRIu16" request queues" 652 "but only %"PRIu64" available\n", 653 vdev->name, vdev->max_queues - vdev->fixed_queues_num, 654 host_max_queues); 655 vdev->max_queues = host_max_queues; 656 } 657 658 /* tell vhost to create queues */ 659 ret = virtio_user_queue_setup(vdev, virtio_user_create_queue); 660 if (ret < 0) { 661 return ret; 662 } 663 664 ret = virtio_user_register_mem(vdev); 665 if (ret < 0) { 666 return ret; 667 } 668 669 return virtio_user_queue_setup(vdev, virtio_user_kick_queue); 670 } 671 672 static int 673 virtio_user_stop_device(struct virtio_dev *vdev) 674 { 675 int ret; 676 677 ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue); 678 /* a queue might fail to stop for various reasons, e.g. socket 679 * connection going down, but this mustn't prevent us from freeing 680 * the mem map. 681 */ 682 virtio_user_unregister_mem(vdev); 683 return ret; 684 } 685 686 static int 687 virtio_user_dev_setup(struct virtio_dev *vdev) 688 { 689 struct virtio_user_dev *dev = vdev->ctx; 690 uint16_t i; 691 692 dev->vhostfd = -1; 693 694 for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) { 695 dev->callfds[i] = -1; 696 dev->kickfds[i] = -1; 697 } 698 699 return vhost_user_setup(dev); 700 } 701 702 static int 703 virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset, 704 void *dst, int length) 705 { 706 struct virtio_user_dev *dev = vdev->ctx; 707 struct vhost_user_config cfg = {0}; 708 int rc; 709 710 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 711 return -ENOTSUP; 712 } 713 714 cfg.offset = 0; 715 cfg.size = VHOST_USER_MAX_CONFIG_SIZE; 716 717 rc = vhost_user_sock(dev, VHOST_USER_GET_CONFIG, &cfg); 718 if (rc < 0) { 719 SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc)); 720 return rc; 721 } 722 723 memcpy(dst, cfg.region + offset, length); 724 return 0; 725 } 726 727 static int 728 virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset, 729 const void *src, int length) 730 { 731 struct virtio_user_dev *dev = vdev->ctx; 732 struct vhost_user_config cfg = {0}; 733 int rc; 734 735 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 736 return -ENOTSUP; 737 } 738 739 cfg.offset = offset; 740 cfg.size = length; 741 memcpy(cfg.region, src, length); 742 743 rc = vhost_user_sock(dev, VHOST_USER_SET_CONFIG, &cfg); 744 if (rc < 0) { 745 SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc)); 746 return rc; 747 } 748 749 return 0; 750 } 751 752 static void 753 virtio_user_set_status(struct virtio_dev *vdev, uint8_t status) 754 { 755 struct virtio_user_dev *dev = vdev->ctx; 756 int rc = 0; 757 758 if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) && 759 status != VIRTIO_CONFIG_S_RESET) { 760 rc = -1; 761 } else if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 762 rc = virtio_user_start_device(vdev); 763 } else if (status == VIRTIO_CONFIG_S_RESET && 764 (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 765 rc = virtio_user_stop_device(vdev); 766 } 767 768 if (rc != 0) { 769 dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET; 770 } else { 771 dev->status = status; 772 } 773 } 774 775 static uint8_t 776 virtio_user_get_status(struct virtio_dev *vdev) 777 { 778 struct virtio_user_dev *dev = vdev->ctx; 779 780 return dev->status; 781 } 782 783 static uint64_t 784 virtio_user_get_features(struct virtio_dev *vdev) 785 { 786 struct virtio_user_dev *dev = vdev->ctx; 787 uint64_t features; 788 int rc; 789 790 rc = vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features); 791 if (rc < 0) { 792 SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc)); 793 return 0; 794 } 795 796 return features; 797 } 798 799 static int 800 virtio_user_set_features(struct virtio_dev *vdev, uint64_t features) 801 { 802 struct virtio_user_dev *dev = vdev->ctx; 803 uint64_t protocol_features; 804 int ret; 805 806 ret = vhost_user_sock(dev, VHOST_USER_SET_FEATURES, &features); 807 if (ret < 0) { 808 return ret; 809 } 810 811 vdev->negotiated_features = features; 812 vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1); 813 814 if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 815 /* nothing else to do */ 816 return 0; 817 } 818 819 ret = vhost_user_sock(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features); 820 if (ret < 0) { 821 return ret; 822 } 823 824 protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES; 825 ret = vhost_user_sock(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features); 826 if (ret < 0) { 827 return ret; 828 } 829 830 dev->protocol_features = protocol_features; 831 return 0; 832 } 833 834 static uint16_t 835 virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id) 836 { 837 struct virtio_user_dev *dev = vdev->ctx; 838 839 /* Currently each queue has same queue size */ 840 return dev->queue_size; 841 } 842 843 static int 844 virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq) 845 { 846 struct virtio_user_dev *dev = vdev->ctx; 847 struct vhost_vring_state state; 848 uint16_t queue_idx = vq->vq_queue_index; 849 void *queue_mem; 850 uint64_t desc_addr, avail_addr, used_addr; 851 int callfd, kickfd, rc; 852 853 if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) { 854 SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx); 855 return -EEXIST; 856 } 857 858 /* May use invalid flag, but some backend uses kickfd and 859 * callfd as criteria to judge if dev is alive. so finally we 860 * use real event_fd. 861 */ 862 callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 863 if (callfd < 0) { 864 SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno)); 865 return -errno; 866 } 867 868 kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 869 if (kickfd < 0) { 870 SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno)); 871 close(callfd); 872 return -errno; 873 } 874 875 queue_mem = spdk_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL, 876 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 877 if (queue_mem == NULL) { 878 close(kickfd); 879 close(callfd); 880 return -ENOMEM; 881 } 882 883 vq->vq_ring_mem = SPDK_VTOPHYS_ERROR; 884 vq->vq_ring_virt_mem = queue_mem; 885 886 state.index = vq->vq_queue_index; 887 state.num = vq->vq_nentries; 888 889 if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 890 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state); 891 if (rc < 0) { 892 SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n", 893 spdk_strerror(-rc)); 894 close(kickfd); 895 close(callfd); 896 spdk_free(queue_mem); 897 return -rc; 898 } 899 } 900 901 dev->callfds[queue_idx] = callfd; 902 dev->kickfds[queue_idx] = kickfd; 903 904 desc_addr = (uintptr_t)vq->vq_ring_virt_mem; 905 avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); 906 used_addr = SPDK_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, 907 ring[vq->vq_nentries]), 908 VIRTIO_PCI_VRING_ALIGN); 909 910 dev->vrings[queue_idx].num = vq->vq_nentries; 911 dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr; 912 dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr; 913 dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr; 914 915 return 0; 916 } 917 918 static void 919 virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq) 920 { 921 /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU 922 * correspondingly stops the ioeventfds, and reset the status of 923 * the device. 924 * For modern devices, set queue desc, avail, used in PCI bar to 0, 925 * not see any more behavior in QEMU. 926 * 927 * Here we just care about what information to deliver to vhost-user. 928 * So we just close ioeventfd for now. 929 */ 930 struct virtio_user_dev *dev = vdev->ctx; 931 932 close(dev->callfds[vq->vq_queue_index]); 933 close(dev->kickfds[vq->vq_queue_index]); 934 dev->callfds[vq->vq_queue_index] = -1; 935 dev->kickfds[vq->vq_queue_index] = -1; 936 937 spdk_free(vq->vq_ring_virt_mem); 938 } 939 940 static void 941 virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq) 942 { 943 uint64_t buf = 1; 944 struct virtio_user_dev *dev = vdev->ctx; 945 946 if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) { 947 SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno)); 948 } 949 } 950 951 static void 952 virtio_user_destroy(struct virtio_dev *vdev) 953 { 954 struct virtio_user_dev *dev = vdev->ctx; 955 956 close(dev->vhostfd); 957 free(dev); 958 } 959 960 static void 961 virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 962 { 963 struct virtio_user_dev *dev = vdev->ctx; 964 965 spdk_json_write_named_string(w, "type", "user"); 966 spdk_json_write_named_string(w, "socket", dev->path); 967 } 968 969 static void 970 virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 971 { 972 struct virtio_user_dev *dev = vdev->ctx; 973 974 spdk_json_write_named_string(w, "trtype", "user"); 975 spdk_json_write_named_string(w, "traddr", dev->path); 976 spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num); 977 spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0)); 978 } 979 980 static const struct virtio_dev_ops virtio_user_ops = { 981 .read_dev_cfg = virtio_user_read_dev_config, 982 .write_dev_cfg = virtio_user_write_dev_config, 983 .get_status = virtio_user_get_status, 984 .set_status = virtio_user_set_status, 985 .get_features = virtio_user_get_features, 986 .set_features = virtio_user_set_features, 987 .destruct_dev = virtio_user_destroy, 988 .get_queue_size = virtio_user_get_queue_size, 989 .setup_queue = virtio_user_setup_queue, 990 .del_queue = virtio_user_del_queue, 991 .notify_queue = virtio_user_notify_queue, 992 .dump_json_info = virtio_user_dump_json_info, 993 .write_json_config = virtio_user_write_json_config, 994 }; 995 996 int 997 virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, 998 uint32_t queue_size) 999 { 1000 struct virtio_user_dev *dev; 1001 int rc; 1002 1003 if (name == NULL) { 1004 SPDK_ERRLOG("No name gived for controller: %s\n", path); 1005 return -EINVAL; 1006 } 1007 1008 dev = calloc(1, sizeof(*dev)); 1009 if (dev == NULL) { 1010 return -ENOMEM; 1011 } 1012 1013 rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev); 1014 if (rc != 0) { 1015 SPDK_ERRLOG("Failed to init device: %s\n", path); 1016 free(dev); 1017 return rc; 1018 } 1019 1020 vdev->is_hw = 0; 1021 1022 snprintf(dev->path, PATH_MAX, "%s", path); 1023 dev->queue_size = queue_size; 1024 1025 rc = virtio_user_dev_setup(vdev); 1026 if (rc < 0) { 1027 SPDK_ERRLOG("backend set up fails\n"); 1028 goto err; 1029 } 1030 1031 rc = vhost_user_sock(dev, VHOST_USER_SET_OWNER, NULL); 1032 if (rc < 0) { 1033 SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc)); 1034 goto err; 1035 } 1036 1037 return 0; 1038 1039 err: 1040 virtio_dev_destruct(vdev); 1041 return rc; 1042 } 1043 SPDK_LOG_REGISTER_COMPONENT(virtio_user) 1044