1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include <sys/eventfd.h> 9 10 #include "spdk/string.h" 11 #include "spdk/config.h" 12 #include "spdk/util.h" 13 14 #include "spdk_internal/virtio.h" 15 #include "spdk_internal/vhost_user.h" 16 17 /* The version of the protocol we support */ 18 #define VHOST_USER_VERSION 0x1 19 20 #define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \ 21 ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ 22 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) 23 24 struct virtio_user_backend_ops; 25 26 struct virtio_user_dev { 27 int vhostfd; 28 29 int callfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 30 int kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 31 uint32_t queue_size; 32 33 uint8_t status; 34 char path[PATH_MAX]; 35 uint64_t protocol_features; 36 struct vring vrings[SPDK_VIRTIO_MAX_VIRTQUEUES]; 37 struct virtio_user_backend_ops *ops; 38 struct spdk_mem_map *mem_map; 39 }; 40 41 struct virtio_user_backend_ops { 42 int (*setup)(struct virtio_user_dev *dev); 43 int (*send_request)(struct virtio_user_dev *dev, 44 enum vhost_user_request req, 45 void *arg); 46 }; 47 48 static int 49 vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) 50 { 51 int r; 52 struct msghdr msgh; 53 struct iovec iov; 54 size_t fd_size = fd_num * sizeof(int); 55 char control[CMSG_SPACE(fd_size)]; 56 struct cmsghdr *cmsg; 57 58 memset(&msgh, 0, sizeof(msgh)); 59 memset(control, 0, sizeof(control)); 60 61 iov.iov_base = (uint8_t *)buf; 62 iov.iov_len = len; 63 64 msgh.msg_iov = &iov; 65 msgh.msg_iovlen = 1; 66 67 if (fds && fd_num > 0) { 68 msgh.msg_control = control; 69 msgh.msg_controllen = sizeof(control); 70 cmsg = CMSG_FIRSTHDR(&msgh); 71 cmsg->cmsg_len = CMSG_LEN(fd_size); 72 cmsg->cmsg_level = SOL_SOCKET; 73 cmsg->cmsg_type = SCM_RIGHTS; 74 memcpy(CMSG_DATA(cmsg), fds, fd_size); 75 } else { 76 msgh.msg_control = NULL; 77 msgh.msg_controllen = 0; 78 } 79 80 do { 81 r = sendmsg(fd, &msgh, 0); 82 } while (r < 0 && errno == EINTR); 83 84 if (r == -1) { 85 return -errno; 86 } 87 88 return 0; 89 } 90 91 static int 92 vhost_user_read(int fd, struct vhost_user_msg *msg) 93 { 94 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; 95 ssize_t ret; 96 size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; 97 98 ret = recv(fd, (void *)msg, sz_hdr, 0); 99 if ((size_t)ret != sz_hdr) { 100 SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n", 101 ret, sz_hdr); 102 if (ret == -1) { 103 return -errno; 104 } else { 105 return -EBUSY; 106 } 107 } 108 109 /* validate msg flags */ 110 if (msg->flags != (valid_flags)) { 111 SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n", 112 msg->flags, valid_flags); 113 return -EIO; 114 } 115 116 sz_payload = msg->size; 117 118 if (sz_payload > VHOST_USER_PAYLOAD_SIZE) { 119 SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n", 120 sz_payload, VHOST_USER_PAYLOAD_SIZE); 121 return -EIO; 122 } 123 124 if (sz_payload) { 125 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); 126 if ((size_t)ret != sz_payload) { 127 SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n", 128 ret, msg->size); 129 if (ret == -1) { 130 return -errno; 131 } else { 132 return -EBUSY; 133 } 134 } 135 } 136 137 return 0; 138 } 139 140 struct hugepage_file_info { 141 uint64_t addr; /**< virtual addr */ 142 size_t size; /**< the file size */ 143 char path[PATH_MAX]; /**< path to backing file */ 144 }; 145 146 /* Two possible options: 147 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file 148 * array. This is simple but cannot be used in secondary process because 149 * secondary process will close and munmap that file. 150 * 2. Match HUGEFILE_FMT to find hugepage files directly. 151 * 152 * We choose option 2. 153 */ 154 static int 155 get_hugepage_file_info(struct hugepage_file_info hugepages[], int max) 156 { 157 int idx, rc; 158 FILE *f; 159 char buf[BUFSIZ], *tmp, *tail; 160 char *str_underline, *str_start; 161 int huge_index; 162 uint64_t v_start, v_end; 163 164 f = fopen("/proc/self/maps", "r"); 165 if (!f) { 166 SPDK_ERRLOG("cannot open /proc/self/maps\n"); 167 rc = -errno; 168 assert(rc < 0); /* scan-build hack */ 169 return rc; 170 } 171 172 idx = 0; 173 while (fgets(buf, sizeof(buf), f) != NULL) { 174 if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { 175 SPDK_ERRLOG("Failed to parse address\n"); 176 rc = -EIO; 177 goto out; 178 } 179 180 tmp = strchr(buf, ' ') + 1; /** skip address */ 181 tmp = strchr(tmp, ' ') + 1; /** skip perm */ 182 tmp = strchr(tmp, ' ') + 1; /** skip offset */ 183 tmp = strchr(tmp, ' ') + 1; /** skip dev */ 184 tmp = strchr(tmp, ' ') + 1; /** skip inode */ 185 while (*tmp == ' ') { /** skip spaces */ 186 tmp++; 187 } 188 tail = strrchr(tmp, '\n'); /** remove newline if exists */ 189 if (tail) { 190 *tail = '\0'; 191 } 192 193 /* Match HUGEFILE_FMT, aka "%s/%smap_%d", 194 * which is defined in eal_filesystem.h 195 */ 196 str_underline = strrchr(tmp, '_'); 197 if (!str_underline) { 198 continue; 199 } 200 201 str_start = str_underline - strlen("map"); 202 if (str_start < tmp) { 203 continue; 204 } 205 206 if (sscanf(str_start, "map_%d", &huge_index) != 1) { 207 continue; 208 } 209 210 if (idx >= max) { 211 SPDK_ERRLOG("Exceed maximum of %d\n", max); 212 rc = -ENOSPC; 213 goto out; 214 } 215 216 if (idx > 0 && 217 strncmp(tmp, hugepages[idx - 1].path, PATH_MAX) == 0 && 218 v_start == hugepages[idx - 1].addr + hugepages[idx - 1].size) { 219 hugepages[idx - 1].size += (v_end - v_start); 220 continue; 221 } 222 223 hugepages[idx].addr = v_start; 224 hugepages[idx].size = v_end - v_start; 225 snprintf(hugepages[idx].path, PATH_MAX, "%s", tmp); 226 idx++; 227 } 228 229 rc = idx; 230 out: 231 fclose(f); 232 return rc; 233 } 234 235 static int 236 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) 237 { 238 int i, num; 239 struct hugepage_file_info hugepages[VHOST_USER_MEMORY_MAX_NREGIONS]; 240 241 num = get_hugepage_file_info(hugepages, VHOST_USER_MEMORY_MAX_NREGIONS); 242 if (num < 0) { 243 SPDK_ERRLOG("Failed to prepare memory for vhost-user\n"); 244 return num; 245 } 246 247 for (i = 0; i < num; ++i) { 248 /* the memory regions are unaligned */ 249 msg->payload.memory.regions[i].guest_phys_addr = hugepages[i].addr; /* use vaddr! */ 250 msg->payload.memory.regions[i].userspace_addr = hugepages[i].addr; 251 msg->payload.memory.regions[i].memory_size = hugepages[i].size; 252 msg->payload.memory.regions[i].flags_padding = 0; 253 fds[i] = open(hugepages[i].path, O_RDWR); 254 } 255 256 msg->payload.memory.nregions = num; 257 msg->payload.memory.padding = 0; 258 259 return 0; 260 } 261 262 static const char *const vhost_msg_strings[VHOST_USER_MAX] = { 263 [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", 264 [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", 265 [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", 266 [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", 267 [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", 268 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", 269 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", 270 [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", 271 [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", 272 [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", 273 [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", 274 [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", 275 [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", 276 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", 277 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", 278 [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG", 279 [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG", 280 }; 281 282 static int 283 vhost_user_sock(struct virtio_user_dev *dev, 284 enum vhost_user_request req, 285 void *arg) 286 { 287 struct vhost_user_msg msg; 288 struct vhost_vring_file *file = 0; 289 int need_reply = 0; 290 int fds[VHOST_USER_MEMORY_MAX_NREGIONS]; 291 int fd_num = 0; 292 int i, len, rc; 293 int vhostfd = dev->vhostfd; 294 295 SPDK_DEBUGLOG(virtio_user, "sent message %d = %s\n", req, vhost_msg_strings[req]); 296 297 msg.request = req; 298 msg.flags = VHOST_USER_VERSION; 299 msg.size = 0; 300 301 switch (req) { 302 case VHOST_USER_GET_FEATURES: 303 case VHOST_USER_GET_PROTOCOL_FEATURES: 304 case VHOST_USER_GET_QUEUE_NUM: 305 need_reply = 1; 306 break; 307 308 case VHOST_USER_SET_FEATURES: 309 case VHOST_USER_SET_LOG_BASE: 310 case VHOST_USER_SET_PROTOCOL_FEATURES: 311 msg.payload.u64 = *((__u64 *)arg); 312 msg.size = sizeof(msg.payload.u64); 313 break; 314 315 case VHOST_USER_SET_OWNER: 316 case VHOST_USER_RESET_OWNER: 317 break; 318 319 case VHOST_USER_SET_MEM_TABLE: 320 rc = prepare_vhost_memory_user(&msg, fds); 321 if (rc < 0) { 322 return rc; 323 } 324 fd_num = msg.payload.memory.nregions; 325 msg.size = sizeof(msg.payload.memory.nregions); 326 msg.size += sizeof(msg.payload.memory.padding); 327 msg.size += fd_num * sizeof(struct vhost_memory_region); 328 break; 329 330 case VHOST_USER_SET_LOG_FD: 331 fds[fd_num++] = *((int *)arg); 332 break; 333 334 case VHOST_USER_SET_VRING_NUM: 335 case VHOST_USER_SET_VRING_BASE: 336 case VHOST_USER_SET_VRING_ENABLE: 337 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 338 msg.size = sizeof(msg.payload.state); 339 break; 340 341 case VHOST_USER_GET_VRING_BASE: 342 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 343 msg.size = sizeof(msg.payload.state); 344 need_reply = 1; 345 break; 346 347 case VHOST_USER_SET_VRING_ADDR: 348 memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); 349 msg.size = sizeof(msg.payload.addr); 350 break; 351 352 case VHOST_USER_SET_VRING_KICK: 353 case VHOST_USER_SET_VRING_CALL: 354 case VHOST_USER_SET_VRING_ERR: 355 file = arg; 356 msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 357 msg.size = sizeof(msg.payload.u64); 358 if (file->fd > 0) { 359 fds[fd_num++] = file->fd; 360 } else { 361 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 362 } 363 break; 364 365 case VHOST_USER_GET_CONFIG: 366 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 367 msg.size = sizeof(msg.payload.cfg); 368 need_reply = 1; 369 break; 370 371 case VHOST_USER_SET_CONFIG: 372 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 373 msg.size = sizeof(msg.payload.cfg); 374 break; 375 376 default: 377 SPDK_ERRLOG("trying to send unknown msg\n"); 378 return -EINVAL; 379 } 380 381 len = VHOST_USER_HDR_SIZE + msg.size; 382 rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num); 383 if (rc < 0) { 384 SPDK_ERRLOG("%s failed: %s\n", 385 vhost_msg_strings[req], spdk_strerror(-rc)); 386 return rc; 387 } 388 389 if (req == VHOST_USER_SET_MEM_TABLE) 390 for (i = 0; i < fd_num; ++i) { 391 close(fds[i]); 392 } 393 394 if (need_reply) { 395 rc = vhost_user_read(vhostfd, &msg); 396 if (rc < 0) { 397 SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc)); 398 return rc; 399 } 400 401 if (req != msg.request) { 402 SPDK_WARNLOG("Received unexpected msg type\n"); 403 return -EIO; 404 } 405 406 switch (req) { 407 case VHOST_USER_GET_FEATURES: 408 case VHOST_USER_GET_PROTOCOL_FEATURES: 409 case VHOST_USER_GET_QUEUE_NUM: 410 if (msg.size != sizeof(msg.payload.u64)) { 411 SPDK_WARNLOG("Received bad msg size\n"); 412 return -EIO; 413 } 414 *((__u64 *)arg) = msg.payload.u64; 415 break; 416 case VHOST_USER_GET_VRING_BASE: 417 if (msg.size != sizeof(msg.payload.state)) { 418 SPDK_WARNLOG("Received bad msg size\n"); 419 return -EIO; 420 } 421 memcpy(arg, &msg.payload.state, 422 sizeof(struct vhost_vring_state)); 423 break; 424 case VHOST_USER_GET_CONFIG: 425 if (msg.size != sizeof(msg.payload.cfg)) { 426 SPDK_WARNLOG("Received bad msg size\n"); 427 return -EIO; 428 } 429 memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg)); 430 break; 431 default: 432 SPDK_WARNLOG("Received unexpected msg type\n"); 433 return -EBADMSG; 434 } 435 } 436 437 return 0; 438 } 439 440 /** 441 * Set up environment to talk with a vhost user backend. 442 * 443 * @return 444 * - (-1) if fail; 445 * - (0) if succeed. 446 */ 447 static int 448 vhost_user_setup(struct virtio_user_dev *dev) 449 { 450 int fd; 451 int flag; 452 struct sockaddr_un un; 453 ssize_t rc; 454 455 fd = socket(AF_UNIX, SOCK_STREAM, 0); 456 if (fd < 0) { 457 SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno)); 458 return -errno; 459 } 460 461 flag = fcntl(fd, F_GETFD); 462 if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) { 463 SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno)); 464 } 465 466 memset(&un, 0, sizeof(un)); 467 un.sun_family = AF_UNIX; 468 rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); 469 if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) { 470 SPDK_ERRLOG("socket path too long\n"); 471 close(fd); 472 if (rc < 0) { 473 return -errno; 474 } else { 475 return -EINVAL; 476 } 477 } 478 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { 479 SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno)); 480 close(fd); 481 return -errno; 482 } 483 484 dev->vhostfd = fd; 485 return 0; 486 } 487 488 struct virtio_user_backend_ops ops_user = { 489 .setup = vhost_user_setup, 490 .send_request = vhost_user_sock, 491 }; 492 493 static int 494 virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel) 495 { 496 struct virtio_user_dev *dev = vdev->ctx; 497 498 /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come 499 * firstly because vhost depends on this msg to allocate virtqueue 500 * pair. 501 */ 502 struct vhost_vring_file file; 503 504 file.index = queue_sel; 505 file.fd = dev->callfds[queue_sel]; 506 return dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file); 507 } 508 509 static int 510 virtio_user_set_vring_addr(struct virtio_dev *vdev, uint32_t queue_sel) 511 { 512 struct virtio_user_dev *dev = vdev->ctx; 513 struct vring *vring = &dev->vrings[queue_sel]; 514 struct vhost_vring_addr addr = { 515 .index = queue_sel, 516 .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, 517 .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, 518 .used_user_addr = (uint64_t)(uintptr_t)vring->used, 519 .log_guest_addr = 0, 520 .flags = 0, /* disable log */ 521 }; 522 523 return dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr); 524 } 525 526 static int 527 virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel) 528 { 529 struct virtio_user_dev *dev = vdev->ctx; 530 struct vhost_vring_file file; 531 struct vhost_vring_state state; 532 struct vring *vring = &dev->vrings[queue_sel]; 533 int rc; 534 535 state.index = queue_sel; 536 state.num = vring->num; 537 rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state); 538 if (rc < 0) { 539 return rc; 540 } 541 542 state.index = queue_sel; 543 state.num = 0; /* no reservation */ 544 rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state); 545 if (rc < 0) { 546 return rc; 547 } 548 549 virtio_user_set_vring_addr(vdev, queue_sel); 550 551 /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes 552 * lastly because vhost depends on this msg to judge if 553 * virtio is ready. 554 */ 555 file.index = queue_sel; 556 file.fd = dev->kickfds[queue_sel]; 557 return dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file); 558 } 559 560 static int 561 virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel) 562 { 563 struct virtio_user_dev *dev = vdev->ctx; 564 struct vhost_vring_state state; 565 566 state.index = queue_sel; 567 state.num = 0; 568 569 return dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE, &state); 570 } 571 572 static int 573 virtio_user_queue_setup(struct virtio_dev *vdev, 574 int (*fn)(struct virtio_dev *, uint32_t)) 575 { 576 uint32_t i; 577 int rc; 578 579 for (i = 0; i < vdev->max_queues; ++i) { 580 rc = fn(vdev, i); 581 if (rc < 0) { 582 SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i); 583 return rc; 584 } 585 } 586 587 return 0; 588 } 589 590 static int 591 virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map, 592 enum spdk_mem_map_notify_action action, 593 void *vaddr, size_t size) 594 { 595 struct virtio_dev *vdev = cb_ctx; 596 struct virtio_user_dev *dev = vdev->ctx; 597 uint64_t features; 598 int ret; 599 600 /* We have to resend all mappings anyway, so don't bother with any 601 * page tracking. 602 */ 603 ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); 604 if (ret < 0) { 605 return ret; 606 } 607 608 /* Since we might want to use that mapping straight away, we have to 609 * make sure the guest has already processed our SET_MEM_TABLE message. 610 * F_REPLY_ACK is just a feature and the host is not obliged to 611 * support it, so we send a simple message that always has a response 612 * and we wait for that response. Messages are always processed in order. 613 */ 614 return dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, &features); 615 } 616 617 static int 618 virtio_user_register_mem(struct virtio_dev *vdev) 619 { 620 struct virtio_user_dev *dev = vdev->ctx; 621 const struct spdk_mem_map_ops virtio_user_map_ops = { 622 .notify_cb = virtio_user_map_notify, 623 .are_contiguous = NULL 624 }; 625 626 dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev); 627 if (dev->mem_map == NULL) { 628 SPDK_ERRLOG("spdk_mem_map_alloc() failed\n"); 629 return -1; 630 } 631 632 return 0; 633 } 634 635 static void 636 virtio_user_unregister_mem(struct virtio_dev *vdev) 637 { 638 struct virtio_user_dev *dev = vdev->ctx; 639 640 spdk_mem_map_free(&dev->mem_map); 641 } 642 643 static int 644 virtio_user_start_device(struct virtio_dev *vdev) 645 { 646 struct virtio_user_dev *dev = vdev->ctx; 647 uint64_t host_max_queues; 648 int ret; 649 650 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 && 651 vdev->max_queues > 1 + vdev->fixed_queues_num) { 652 SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the " 653 "host doesn't support VHOST_USER_PROTOCOL_F_MQ. " 654 "Only one request queue will be used.\n", 655 vdev->name, vdev->max_queues - vdev->fixed_queues_num); 656 vdev->max_queues = 1 + vdev->fixed_queues_num; 657 } 658 659 /* negotiate the number of I/O queues. */ 660 ret = dev->ops->send_request(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues); 661 if (ret < 0) { 662 return ret; 663 } 664 665 if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) { 666 SPDK_WARNLOG("%s: requested %"PRIu16" request queues" 667 "but only %"PRIu64" available\n", 668 vdev->name, vdev->max_queues - vdev->fixed_queues_num, 669 host_max_queues); 670 vdev->max_queues = host_max_queues; 671 } 672 673 /* tell vhost to create queues */ 674 ret = virtio_user_queue_setup(vdev, virtio_user_create_queue); 675 if (ret < 0) { 676 return ret; 677 } 678 679 ret = virtio_user_register_mem(vdev); 680 if (ret < 0) { 681 return ret; 682 } 683 684 return virtio_user_queue_setup(vdev, virtio_user_kick_queue); 685 } 686 687 static int 688 virtio_user_stop_device(struct virtio_dev *vdev) 689 { 690 int ret; 691 692 ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue); 693 /* a queue might fail to stop for various reasons, e.g. socket 694 * connection going down, but this mustn't prevent us from freeing 695 * the mem map. 696 */ 697 virtio_user_unregister_mem(vdev); 698 return ret; 699 } 700 701 static int 702 virtio_user_dev_setup(struct virtio_dev *vdev) 703 { 704 struct virtio_user_dev *dev = vdev->ctx; 705 uint16_t i; 706 707 dev->vhostfd = -1; 708 709 for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) { 710 dev->callfds[i] = -1; 711 dev->kickfds[i] = -1; 712 } 713 714 dev->ops = &ops_user; 715 716 return dev->ops->setup(dev); 717 } 718 719 static int 720 virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset, 721 void *dst, int length) 722 { 723 struct virtio_user_dev *dev = vdev->ctx; 724 struct vhost_user_config cfg = {0}; 725 int rc; 726 727 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 728 return -ENOTSUP; 729 } 730 731 cfg.offset = 0; 732 cfg.size = VHOST_USER_MAX_CONFIG_SIZE; 733 734 rc = dev->ops->send_request(dev, VHOST_USER_GET_CONFIG, &cfg); 735 if (rc < 0) { 736 SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc)); 737 return rc; 738 } 739 740 memcpy(dst, cfg.region + offset, length); 741 return 0; 742 } 743 744 static int 745 virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset, 746 const void *src, int length) 747 { 748 struct virtio_user_dev *dev = vdev->ctx; 749 struct vhost_user_config cfg = {0}; 750 int rc; 751 752 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 753 return -ENOTSUP; 754 } 755 756 cfg.offset = offset; 757 cfg.size = length; 758 memcpy(cfg.region, src, length); 759 760 rc = dev->ops->send_request(dev, VHOST_USER_SET_CONFIG, &cfg); 761 if (rc < 0) { 762 SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc)); 763 return rc; 764 } 765 766 return 0; 767 } 768 769 static void 770 virtio_user_set_status(struct virtio_dev *vdev, uint8_t status) 771 { 772 struct virtio_user_dev *dev = vdev->ctx; 773 int rc = 0; 774 775 if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) && 776 status != VIRTIO_CONFIG_S_RESET) { 777 rc = -1; 778 } else if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 779 rc = virtio_user_start_device(vdev); 780 } else if (status == VIRTIO_CONFIG_S_RESET && 781 (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 782 rc = virtio_user_stop_device(vdev); 783 } 784 785 if (rc != 0) { 786 dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET; 787 } else { 788 dev->status = status; 789 } 790 } 791 792 static uint8_t 793 virtio_user_get_status(struct virtio_dev *vdev) 794 { 795 struct virtio_user_dev *dev = vdev->ctx; 796 797 return dev->status; 798 } 799 800 static uint64_t 801 virtio_user_get_features(struct virtio_dev *vdev) 802 { 803 struct virtio_user_dev *dev = vdev->ctx; 804 uint64_t features; 805 int rc; 806 807 rc = dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, &features); 808 if (rc < 0) { 809 SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc)); 810 return 0; 811 } 812 813 return features; 814 } 815 816 static int 817 virtio_user_set_features(struct virtio_dev *vdev, uint64_t features) 818 { 819 struct virtio_user_dev *dev = vdev->ctx; 820 uint64_t protocol_features; 821 int ret; 822 823 ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features); 824 if (ret < 0) { 825 return ret; 826 } 827 828 vdev->negotiated_features = features; 829 vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1); 830 831 if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 832 /* nothing else to do */ 833 return 0; 834 } 835 836 ret = dev->ops->send_request(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features); 837 if (ret < 0) { 838 return ret; 839 } 840 841 protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES; 842 ret = dev->ops->send_request(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features); 843 if (ret < 0) { 844 return ret; 845 } 846 847 dev->protocol_features = protocol_features; 848 return 0; 849 } 850 851 static uint16_t 852 virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id) 853 { 854 struct virtio_user_dev *dev = vdev->ctx; 855 856 /* Currently each queue has same queue size */ 857 return dev->queue_size; 858 } 859 860 static int 861 virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq) 862 { 863 struct virtio_user_dev *dev = vdev->ctx; 864 struct vhost_vring_state state; 865 uint16_t queue_idx = vq->vq_queue_index; 866 void *queue_mem; 867 uint64_t desc_addr, avail_addr, used_addr; 868 int callfd, kickfd, rc; 869 870 if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) { 871 SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx); 872 return -EEXIST; 873 } 874 875 /* May use invalid flag, but some backend uses kickfd and 876 * callfd as criteria to judge if dev is alive. so finally we 877 * use real event_fd. 878 */ 879 callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 880 if (callfd < 0) { 881 SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno)); 882 return -errno; 883 } 884 885 kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 886 if (kickfd < 0) { 887 SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno)); 888 close(callfd); 889 return -errno; 890 } 891 892 queue_mem = spdk_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL, 893 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 894 if (queue_mem == NULL) { 895 close(kickfd); 896 close(callfd); 897 return -ENOMEM; 898 } 899 900 vq->vq_ring_mem = SPDK_VTOPHYS_ERROR; 901 vq->vq_ring_virt_mem = queue_mem; 902 903 state.index = vq->vq_queue_index; 904 state.num = vq->vq_nentries; 905 906 if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 907 rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_ENABLE, &state); 908 if (rc < 0) { 909 SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n", 910 spdk_strerror(-rc)); 911 close(kickfd); 912 close(callfd); 913 spdk_free(queue_mem); 914 return -rc; 915 } 916 } 917 918 dev->callfds[queue_idx] = callfd; 919 dev->kickfds[queue_idx] = kickfd; 920 921 desc_addr = (uintptr_t)vq->vq_ring_virt_mem; 922 avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); 923 used_addr = SPDK_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, 924 ring[vq->vq_nentries]), 925 VIRTIO_PCI_VRING_ALIGN); 926 927 dev->vrings[queue_idx].num = vq->vq_nentries; 928 dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr; 929 dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr; 930 dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr; 931 932 return 0; 933 } 934 935 static void 936 virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq) 937 { 938 /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU 939 * correspondingly stops the ioeventfds, and reset the status of 940 * the device. 941 * For modern devices, set queue desc, avail, used in PCI bar to 0, 942 * not see any more behavior in QEMU. 943 * 944 * Here we just care about what information to deliver to vhost-user. 945 * So we just close ioeventfd for now. 946 */ 947 struct virtio_user_dev *dev = vdev->ctx; 948 949 close(dev->callfds[vq->vq_queue_index]); 950 close(dev->kickfds[vq->vq_queue_index]); 951 dev->callfds[vq->vq_queue_index] = -1; 952 dev->kickfds[vq->vq_queue_index] = -1; 953 954 spdk_free(vq->vq_ring_virt_mem); 955 } 956 957 static void 958 virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq) 959 { 960 uint64_t buf = 1; 961 struct virtio_user_dev *dev = vdev->ctx; 962 963 if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) { 964 SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno)); 965 } 966 } 967 968 static void 969 virtio_user_destroy(struct virtio_dev *vdev) 970 { 971 struct virtio_user_dev *dev = vdev->ctx; 972 973 close(dev->vhostfd); 974 free(dev); 975 } 976 977 static void 978 virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 979 { 980 struct virtio_user_dev *dev = vdev->ctx; 981 982 spdk_json_write_named_string(w, "type", "user"); 983 spdk_json_write_named_string(w, "socket", dev->path); 984 } 985 986 static void 987 virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 988 { 989 struct virtio_user_dev *dev = vdev->ctx; 990 991 spdk_json_write_named_string(w, "trtype", "user"); 992 spdk_json_write_named_string(w, "traddr", dev->path); 993 spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num); 994 spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0)); 995 } 996 997 static const struct virtio_dev_ops virtio_user_ops = { 998 .read_dev_cfg = virtio_user_read_dev_config, 999 .write_dev_cfg = virtio_user_write_dev_config, 1000 .get_status = virtio_user_get_status, 1001 .set_status = virtio_user_set_status, 1002 .get_features = virtio_user_get_features, 1003 .set_features = virtio_user_set_features, 1004 .destruct_dev = virtio_user_destroy, 1005 .get_queue_size = virtio_user_get_queue_size, 1006 .setup_queue = virtio_user_setup_queue, 1007 .del_queue = virtio_user_del_queue, 1008 .notify_queue = virtio_user_notify_queue, 1009 .dump_json_info = virtio_user_dump_json_info, 1010 .write_json_config = virtio_user_write_json_config, 1011 }; 1012 1013 int 1014 virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, 1015 uint32_t queue_size) 1016 { 1017 struct virtio_user_dev *dev; 1018 int rc; 1019 1020 if (name == NULL) { 1021 SPDK_ERRLOG("No name gived for controller: %s\n", path); 1022 return -EINVAL; 1023 } 1024 1025 dev = calloc(1, sizeof(*dev)); 1026 if (dev == NULL) { 1027 return -ENOMEM; 1028 } 1029 1030 rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev); 1031 if (rc != 0) { 1032 SPDK_ERRLOG("Failed to init device: %s\n", path); 1033 free(dev); 1034 return rc; 1035 } 1036 1037 vdev->is_hw = 0; 1038 1039 snprintf(dev->path, PATH_MAX, "%s", path); 1040 dev->queue_size = queue_size; 1041 1042 rc = virtio_user_dev_setup(vdev); 1043 if (rc < 0) { 1044 SPDK_ERRLOG("backend set up fails\n"); 1045 goto err; 1046 } 1047 1048 rc = dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL); 1049 if (rc < 0) { 1050 SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc)); 1051 goto err; 1052 } 1053 1054 return 0; 1055 1056 err: 1057 virtio_dev_destruct(vdev); 1058 return rc; 1059 } 1060 SPDK_LOG_REGISTER_COMPONENT(virtio_user) 1061