1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2010-2016 Intel Corporation. All rights reserved. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include <sys/eventfd.h> 9 10 #include "spdk/string.h" 11 #include "spdk/config.h" 12 #include "spdk/util.h" 13 14 #include "spdk_internal/virtio.h" 15 #include "spdk_internal/vhost_user.h" 16 17 /* The version of the protocol we support */ 18 #define VHOST_USER_VERSION 0x1 19 20 #define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \ 21 ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ 22 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) 23 24 struct virtio_user_dev { 25 int vhostfd; 26 27 int callfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 28 int kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES]; 29 uint32_t queue_size; 30 31 uint8_t status; 32 bool is_stopping; 33 char path[PATH_MAX]; 34 uint64_t protocol_features; 35 struct vring vrings[SPDK_VIRTIO_MAX_VIRTQUEUES]; 36 struct spdk_mem_map *mem_map; 37 }; 38 39 static int 40 vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) 41 { 42 int r; 43 struct msghdr msgh; 44 struct iovec iov; 45 size_t fd_size = fd_num * sizeof(int); 46 char control[CMSG_SPACE(fd_size)]; 47 struct cmsghdr *cmsg; 48 49 memset(&msgh, 0, sizeof(msgh)); 50 memset(control, 0, sizeof(control)); 51 52 iov.iov_base = (uint8_t *)buf; 53 iov.iov_len = len; 54 55 msgh.msg_iov = &iov; 56 msgh.msg_iovlen = 1; 57 58 if (fds && fd_num > 0) { 59 msgh.msg_control = control; 60 msgh.msg_controllen = sizeof(control); 61 cmsg = CMSG_FIRSTHDR(&msgh); 62 if (!cmsg) { 63 SPDK_WARNLOG("First HDR is NULL\n"); 64 return -EIO; 65 } 66 cmsg->cmsg_len = CMSG_LEN(fd_size); 67 cmsg->cmsg_level = SOL_SOCKET; 68 cmsg->cmsg_type = SCM_RIGHTS; 69 memcpy(CMSG_DATA(cmsg), fds, fd_size); 70 } else { 71 msgh.msg_control = NULL; 72 msgh.msg_controllen = 0; 73 } 74 75 do { 76 r = sendmsg(fd, &msgh, 0); 77 } while (r < 0 && errno == EINTR); 78 79 if (r == -1) { 80 return -errno; 81 } 82 83 return 0; 84 } 85 86 static int 87 vhost_user_read(int fd, struct vhost_user_msg *msg) 88 { 89 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; 90 ssize_t ret; 91 size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; 92 93 ret = recv(fd, (void *)msg, sz_hdr, 0); 94 if ((size_t)ret != sz_hdr) { 95 SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n", 96 ret, sz_hdr); 97 if (ret == -1) { 98 return -errno; 99 } else { 100 return -EBUSY; 101 } 102 } 103 104 /* validate msg flags */ 105 if (msg->flags != (valid_flags)) { 106 SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n", 107 msg->flags, valid_flags); 108 return -EIO; 109 } 110 111 sz_payload = msg->size; 112 113 if (sz_payload > VHOST_USER_PAYLOAD_SIZE) { 114 SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n", 115 sz_payload, VHOST_USER_PAYLOAD_SIZE); 116 return -EIO; 117 } 118 119 if (sz_payload) { 120 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); 121 if ((size_t)ret != sz_payload) { 122 SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n", 123 ret, msg->size); 124 if (ret == -1) { 125 return -errno; 126 } else { 127 return -EBUSY; 128 } 129 } 130 } 131 132 return 0; 133 } 134 135 struct hugepage_file_info { 136 uint64_t addr; /**< virtual addr */ 137 size_t size; /**< the file size */ 138 char path[PATH_MAX]; /**< path to backing file */ 139 }; 140 141 /* Two possible options: 142 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file 143 * array. This is simple but cannot be used in secondary process because 144 * secondary process will close and munmap that file. 145 * 2. Match HUGEFILE_FMT to find hugepage files directly. 146 * 147 * We choose option 2. 148 */ 149 static int 150 get_hugepage_file_info(struct hugepage_file_info hugepages[], int max) 151 { 152 int idx, rc; 153 FILE *f; 154 char buf[BUFSIZ], *tmp, *tail; 155 char *str_underline, *str_start; 156 int huge_index; 157 uint64_t v_start, v_end; 158 159 f = fopen("/proc/self/maps", "r"); 160 if (!f) { 161 SPDK_ERRLOG("cannot open /proc/self/maps\n"); 162 rc = -errno; 163 assert(rc < 0); /* scan-build hack */ 164 return rc; 165 } 166 167 idx = 0; 168 while (fgets(buf, sizeof(buf), f) != NULL) { 169 if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { 170 SPDK_ERRLOG("Failed to parse address\n"); 171 rc = -EIO; 172 goto out; 173 } 174 175 tmp = strchr(buf, ' ') + 1; /** skip address */ 176 tmp = strchr(tmp, ' ') + 1; /** skip perm */ 177 tmp = strchr(tmp, ' ') + 1; /** skip offset */ 178 tmp = strchr(tmp, ' ') + 1; /** skip dev */ 179 tmp = strchr(tmp, ' ') + 1; /** skip inode */ 180 while (*tmp == ' ') { /** skip spaces */ 181 tmp++; 182 } 183 tail = strrchr(tmp, '\n'); /** remove newline if exists */ 184 if (tail) { 185 *tail = '\0'; 186 } 187 188 /* Match HUGEFILE_FMT, aka "%s/%smap_%d", 189 * which is defined in eal_filesystem.h 190 */ 191 str_underline = strrchr(tmp, '_'); 192 if (!str_underline) { 193 continue; 194 } 195 196 str_start = str_underline - strlen("map"); 197 if (str_start < tmp) { 198 continue; 199 } 200 201 if (sscanf(str_start, "map_%d", &huge_index) != 1) { 202 continue; 203 } 204 205 if (idx >= max) { 206 SPDK_ERRLOG("Exceed maximum of %d\n", max); 207 rc = -ENOSPC; 208 goto out; 209 } 210 211 if (idx > 0 && 212 strncmp(tmp, hugepages[idx - 1].path, PATH_MAX) == 0 && 213 v_start == hugepages[idx - 1].addr + hugepages[idx - 1].size) { 214 hugepages[idx - 1].size += (v_end - v_start); 215 continue; 216 } 217 218 hugepages[idx].addr = v_start; 219 hugepages[idx].size = v_end - v_start; 220 snprintf(hugepages[idx].path, PATH_MAX, "%s", tmp); 221 idx++; 222 } 223 224 rc = idx; 225 out: 226 fclose(f); 227 return rc; 228 } 229 230 static int 231 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) 232 { 233 int i, num; 234 struct hugepage_file_info hugepages[VHOST_USER_MEMORY_MAX_NREGIONS]; 235 236 num = get_hugepage_file_info(hugepages, VHOST_USER_MEMORY_MAX_NREGIONS); 237 if (num < 0) { 238 SPDK_ERRLOG("Failed to prepare memory for vhost-user\n"); 239 return num; 240 } 241 242 for (i = 0; i < num; ++i) { 243 /* the memory regions are unaligned */ 244 msg->payload.memory.regions[i].guest_phys_addr = hugepages[i].addr; /* use vaddr! */ 245 msg->payload.memory.regions[i].userspace_addr = hugepages[i].addr; 246 msg->payload.memory.regions[i].memory_size = hugepages[i].size; 247 msg->payload.memory.regions[i].flags_padding = 0; 248 fds[i] = open(hugepages[i].path, O_RDWR); 249 } 250 251 msg->payload.memory.nregions = num; 252 msg->payload.memory.padding = 0; 253 254 return 0; 255 } 256 257 static const char *const vhost_msg_strings[VHOST_USER_MAX] = { 258 [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", 259 [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", 260 [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", 261 [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", 262 [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", 263 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", 264 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", 265 [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", 266 [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", 267 [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", 268 [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", 269 [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", 270 [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", 271 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", 272 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", 273 [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG", 274 [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG", 275 }; 276 277 static int 278 vhost_user_sock(struct virtio_user_dev *dev, 279 enum vhost_user_request req, 280 void *arg) 281 { 282 struct vhost_user_msg msg; 283 struct vhost_vring_file *file = 0; 284 int need_reply = 0; 285 int fds[VHOST_USER_MEMORY_MAX_NREGIONS]; 286 int fd_num = 0; 287 int i, len, rc; 288 int vhostfd = dev->vhostfd; 289 290 SPDK_DEBUGLOG(virtio_user, "sent message %d = %s\n", req, vhost_msg_strings[req]); 291 292 msg.request = req; 293 msg.flags = VHOST_USER_VERSION; 294 msg.size = 0; 295 296 switch (req) { 297 case VHOST_USER_GET_FEATURES: 298 case VHOST_USER_GET_PROTOCOL_FEATURES: 299 case VHOST_USER_GET_QUEUE_NUM: 300 need_reply = 1; 301 break; 302 303 case VHOST_USER_SET_FEATURES: 304 case VHOST_USER_SET_LOG_BASE: 305 case VHOST_USER_SET_PROTOCOL_FEATURES: 306 msg.payload.u64 = *((__u64 *)arg); 307 msg.size = sizeof(msg.payload.u64); 308 break; 309 310 case VHOST_USER_SET_OWNER: 311 case VHOST_USER_RESET_OWNER: 312 break; 313 314 case VHOST_USER_SET_MEM_TABLE: 315 rc = prepare_vhost_memory_user(&msg, fds); 316 if (rc < 0) { 317 return rc; 318 } 319 fd_num = msg.payload.memory.nregions; 320 msg.size = sizeof(msg.payload.memory.nregions); 321 msg.size += sizeof(msg.payload.memory.padding); 322 msg.size += fd_num * sizeof(struct vhost_memory_region); 323 break; 324 325 case VHOST_USER_SET_LOG_FD: 326 fds[fd_num++] = *((int *)arg); 327 break; 328 329 case VHOST_USER_SET_VRING_NUM: 330 case VHOST_USER_SET_VRING_BASE: 331 case VHOST_USER_SET_VRING_ENABLE: 332 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 333 msg.size = sizeof(msg.payload.state); 334 break; 335 336 case VHOST_USER_GET_VRING_BASE: 337 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 338 msg.size = sizeof(msg.payload.state); 339 need_reply = 1; 340 break; 341 342 case VHOST_USER_SET_VRING_ADDR: 343 memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); 344 msg.size = sizeof(msg.payload.addr); 345 break; 346 347 case VHOST_USER_SET_VRING_KICK: 348 case VHOST_USER_SET_VRING_CALL: 349 case VHOST_USER_SET_VRING_ERR: 350 file = arg; 351 msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 352 msg.size = sizeof(msg.payload.u64); 353 if (file->fd > 0) { 354 fds[fd_num++] = file->fd; 355 } else { 356 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 357 } 358 break; 359 360 case VHOST_USER_GET_CONFIG: 361 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 362 msg.size = sizeof(msg.payload.cfg); 363 need_reply = 1; 364 break; 365 366 case VHOST_USER_SET_CONFIG: 367 memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg)); 368 msg.size = sizeof(msg.payload.cfg); 369 break; 370 371 default: 372 SPDK_ERRLOG("trying to send unknown msg\n"); 373 return -EINVAL; 374 } 375 376 len = VHOST_USER_HDR_SIZE + msg.size; 377 rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num); 378 if (rc < 0) { 379 SPDK_ERRLOG("%s failed: %s\n", 380 vhost_msg_strings[req], spdk_strerror(-rc)); 381 return rc; 382 } 383 384 if (req == VHOST_USER_SET_MEM_TABLE) 385 for (i = 0; i < fd_num; ++i) { 386 close(fds[i]); 387 } 388 389 if (need_reply) { 390 rc = vhost_user_read(vhostfd, &msg); 391 if (rc < 0) { 392 SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc)); 393 return rc; 394 } 395 396 if (req != msg.request) { 397 SPDK_WARNLOG("Received unexpected msg type\n"); 398 return -EIO; 399 } 400 401 switch (req) { 402 case VHOST_USER_GET_FEATURES: 403 case VHOST_USER_GET_PROTOCOL_FEATURES: 404 case VHOST_USER_GET_QUEUE_NUM: 405 if (msg.size != sizeof(msg.payload.u64)) { 406 SPDK_WARNLOG("Received bad msg size\n"); 407 return -EIO; 408 } 409 *((__u64 *)arg) = msg.payload.u64; 410 break; 411 case VHOST_USER_GET_VRING_BASE: 412 if (msg.size != sizeof(msg.payload.state)) { 413 SPDK_WARNLOG("Received bad msg size\n"); 414 return -EIO; 415 } 416 memcpy(arg, &msg.payload.state, 417 sizeof(struct vhost_vring_state)); 418 break; 419 case VHOST_USER_GET_CONFIG: 420 if (msg.size != sizeof(msg.payload.cfg)) { 421 SPDK_WARNLOG("Received bad msg size\n"); 422 return -EIO; 423 } 424 memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg)); 425 break; 426 default: 427 SPDK_WARNLOG("Received unexpected msg type\n"); 428 return -EBADMSG; 429 } 430 } 431 432 return 0; 433 } 434 435 /** 436 * Set up environment to talk with a vhost user backend. 437 * 438 * @return 439 * - (-1) if fail; 440 * - (0) if succeed. 441 */ 442 static int 443 vhost_user_setup(struct virtio_user_dev *dev) 444 { 445 int fd; 446 int flag; 447 struct sockaddr_un un; 448 ssize_t rc; 449 450 fd = socket(AF_UNIX, SOCK_STREAM, 0); 451 if (fd < 0) { 452 SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno)); 453 return -errno; 454 } 455 456 flag = fcntl(fd, F_GETFD); 457 if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) { 458 SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno)); 459 } 460 461 memset(&un, 0, sizeof(un)); 462 un.sun_family = AF_UNIX; 463 rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); 464 if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) { 465 SPDK_ERRLOG("socket path too long\n"); 466 close(fd); 467 if (rc < 0) { 468 return -errno; 469 } else { 470 return -EINVAL; 471 } 472 } 473 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { 474 SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno)); 475 close(fd); 476 return -errno; 477 } 478 479 dev->vhostfd = fd; 480 return 0; 481 } 482 483 static int 484 virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel) 485 { 486 struct virtio_user_dev *dev = vdev->ctx; 487 488 /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come 489 * firstly because vhost depends on this msg to allocate virtqueue 490 * pair. 491 */ 492 struct vhost_vring_file file; 493 494 file.index = queue_sel; 495 file.fd = dev->callfds[queue_sel]; 496 return vhost_user_sock(dev, VHOST_USER_SET_VRING_CALL, &file); 497 } 498 499 static int 500 virtio_user_set_vring_addr(struct virtio_dev *vdev, uint32_t queue_sel) 501 { 502 struct virtio_user_dev *dev = vdev->ctx; 503 struct vring *vring = &dev->vrings[queue_sel]; 504 struct vhost_vring_addr addr = { 505 .index = queue_sel, 506 .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, 507 .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, 508 .used_user_addr = (uint64_t)(uintptr_t)vring->used, 509 .log_guest_addr = 0, 510 .flags = 0, /* disable log */ 511 }; 512 513 return vhost_user_sock(dev, VHOST_USER_SET_VRING_ADDR, &addr); 514 } 515 516 static int 517 virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel) 518 { 519 struct virtio_user_dev *dev = vdev->ctx; 520 struct vhost_vring_file file; 521 struct vhost_vring_state state; 522 struct vring *vring = &dev->vrings[queue_sel]; 523 int rc; 524 525 state.index = queue_sel; 526 state.num = vring->num; 527 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_NUM, &state); 528 if (rc < 0) { 529 return rc; 530 } 531 532 state.index = queue_sel; 533 state.num = 0; /* no reservation */ 534 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_BASE, &state); 535 if (rc < 0) { 536 return rc; 537 } 538 539 virtio_user_set_vring_addr(vdev, queue_sel); 540 541 /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes 542 * lastly because vhost depends on this msg to judge if 543 * virtio is ready. 544 */ 545 file.index = queue_sel; 546 file.fd = dev->kickfds[queue_sel]; 547 return vhost_user_sock(dev, VHOST_USER_SET_VRING_KICK, &file); 548 } 549 550 static int 551 virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel) 552 { 553 struct virtio_user_dev *dev = vdev->ctx; 554 struct vhost_vring_state state; 555 556 state.index = queue_sel; 557 state.num = 0; 558 559 return vhost_user_sock(dev, VHOST_USER_GET_VRING_BASE, &state); 560 } 561 562 static int 563 virtio_user_queue_setup(struct virtio_dev *vdev, 564 int (*fn)(struct virtio_dev *, uint32_t)) 565 { 566 uint32_t i; 567 int rc; 568 569 for (i = 0; i < vdev->max_queues; ++i) { 570 rc = fn(vdev, i); 571 if (rc < 0) { 572 SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i); 573 return rc; 574 } 575 } 576 577 return 0; 578 } 579 580 static int 581 virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map, 582 enum spdk_mem_map_notify_action action, 583 void *vaddr, size_t size) 584 { 585 struct virtio_dev *vdev = cb_ctx; 586 struct virtio_user_dev *dev = vdev->ctx; 587 uint64_t features; 588 int ret; 589 590 /* We do not support dynamic memory allocation with virtio-user. If this is the 591 * initial notification when the device is started, dev->mem_map will be NULL. If 592 * this is the final notification when the device is stopped, dev->is_stopping will 593 * be true. All other cases are unsupported. 594 */ 595 if (dev->mem_map != NULL && !dev->is_stopping) { 596 assert(false); 597 SPDK_ERRLOG("Memory map change with active virtio_user_devs not allowed.\n"); 598 SPDK_ERRLOG("Pre-allocate memory for application using -s (mem_size) option.\n"); 599 return -1; 600 } 601 602 /* We have to resend all mappings anyway, so don't bother with any 603 * page tracking. 604 */ 605 ret = vhost_user_sock(dev, VHOST_USER_SET_MEM_TABLE, NULL); 606 if (ret < 0) { 607 return ret; 608 } 609 610 /* Since we might want to use that mapping straight away, we have to 611 * make sure the guest has already processed our SET_MEM_TABLE message. 612 * F_REPLY_ACK is just a feature and the host is not obliged to 613 * support it, so we send a simple message that always has a response 614 * and we wait for that response. Messages are always processed in order. 615 */ 616 return vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features); 617 } 618 619 static int 620 virtio_user_register_mem(struct virtio_dev *vdev) 621 { 622 struct virtio_user_dev *dev = vdev->ctx; 623 const struct spdk_mem_map_ops virtio_user_map_ops = { 624 .notify_cb = virtio_user_map_notify, 625 .are_contiguous = NULL 626 }; 627 628 dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev); 629 if (dev->mem_map == NULL) { 630 SPDK_ERRLOG("spdk_mem_map_alloc() failed\n"); 631 return -1; 632 } 633 634 return 0; 635 } 636 637 static void 638 virtio_user_unregister_mem(struct virtio_dev *vdev) 639 { 640 struct virtio_user_dev *dev = vdev->ctx; 641 642 dev->is_stopping = true; 643 spdk_mem_map_free(&dev->mem_map); 644 } 645 646 static int 647 virtio_user_start_device(struct virtio_dev *vdev) 648 { 649 struct virtio_user_dev *dev = vdev->ctx; 650 uint64_t host_max_queues; 651 int ret; 652 653 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 && 654 vdev->max_queues > 1 + vdev->fixed_queues_num) { 655 SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the " 656 "host doesn't support VHOST_USER_PROTOCOL_F_MQ. " 657 "Only one request queue will be used.\n", 658 vdev->name, vdev->max_queues - vdev->fixed_queues_num); 659 vdev->max_queues = 1 + vdev->fixed_queues_num; 660 } 661 662 /* negotiate the number of I/O queues. */ 663 ret = vhost_user_sock(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues); 664 if (ret < 0) { 665 return ret; 666 } 667 668 if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) { 669 SPDK_WARNLOG("%s: requested %"PRIu16" request queues" 670 "but only %"PRIu64" available\n", 671 vdev->name, vdev->max_queues - vdev->fixed_queues_num, 672 host_max_queues); 673 vdev->max_queues = host_max_queues; 674 } 675 676 /* tell vhost to create queues */ 677 ret = virtio_user_queue_setup(vdev, virtio_user_create_queue); 678 if (ret < 0) { 679 return ret; 680 } 681 682 ret = virtio_user_register_mem(vdev); 683 if (ret < 0) { 684 return ret; 685 } 686 687 return virtio_user_queue_setup(vdev, virtio_user_kick_queue); 688 } 689 690 static int 691 virtio_user_stop_device(struct virtio_dev *vdev) 692 { 693 int ret; 694 695 ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue); 696 /* a queue might fail to stop for various reasons, e.g. socket 697 * connection going down, but this mustn't prevent us from freeing 698 * the mem map. 699 */ 700 virtio_user_unregister_mem(vdev); 701 return ret; 702 } 703 704 static int 705 virtio_user_dev_setup(struct virtio_dev *vdev) 706 { 707 struct virtio_user_dev *dev = vdev->ctx; 708 uint16_t i; 709 710 dev->vhostfd = -1; 711 712 for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) { 713 dev->callfds[i] = -1; 714 dev->kickfds[i] = -1; 715 } 716 717 return vhost_user_setup(dev); 718 } 719 720 static int 721 virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset, 722 void *dst, int length) 723 { 724 struct virtio_user_dev *dev = vdev->ctx; 725 struct vhost_user_config cfg = {0}; 726 int rc; 727 728 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 729 return -ENOTSUP; 730 } 731 732 cfg.offset = 0; 733 cfg.size = VHOST_USER_MAX_CONFIG_SIZE; 734 735 rc = vhost_user_sock(dev, VHOST_USER_GET_CONFIG, &cfg); 736 if (rc < 0) { 737 SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc)); 738 return rc; 739 } 740 741 memcpy(dst, cfg.region + offset, length); 742 return 0; 743 } 744 745 static int 746 virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset, 747 const void *src, int length) 748 { 749 struct virtio_user_dev *dev = vdev->ctx; 750 struct vhost_user_config cfg = {0}; 751 int rc; 752 753 if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) { 754 return -ENOTSUP; 755 } 756 757 cfg.offset = offset; 758 cfg.size = length; 759 memcpy(cfg.region, src, length); 760 761 rc = vhost_user_sock(dev, VHOST_USER_SET_CONFIG, &cfg); 762 if (rc < 0) { 763 SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc)); 764 return rc; 765 } 766 767 return 0; 768 } 769 770 static void 771 virtio_user_set_status(struct virtio_dev *vdev, uint8_t status) 772 { 773 struct virtio_user_dev *dev = vdev->ctx; 774 int rc = 0; 775 776 if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) && 777 status != VIRTIO_CONFIG_S_RESET) { 778 rc = -1; 779 } else if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 780 rc = virtio_user_start_device(vdev); 781 } else if (status == VIRTIO_CONFIG_S_RESET && 782 (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 783 rc = virtio_user_stop_device(vdev); 784 } 785 786 if (rc != 0) { 787 dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET; 788 } else { 789 dev->status = status; 790 } 791 } 792 793 static uint8_t 794 virtio_user_get_status(struct virtio_dev *vdev) 795 { 796 struct virtio_user_dev *dev = vdev->ctx; 797 798 return dev->status; 799 } 800 801 static uint64_t 802 virtio_user_get_features(struct virtio_dev *vdev) 803 { 804 struct virtio_user_dev *dev = vdev->ctx; 805 uint64_t features; 806 int rc; 807 808 rc = vhost_user_sock(dev, VHOST_USER_GET_FEATURES, &features); 809 if (rc < 0) { 810 SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc)); 811 return 0; 812 } 813 814 return features; 815 } 816 817 static int 818 virtio_user_set_features(struct virtio_dev *vdev, uint64_t features) 819 { 820 struct virtio_user_dev *dev = vdev->ctx; 821 uint64_t protocol_features; 822 int ret; 823 824 ret = vhost_user_sock(dev, VHOST_USER_SET_FEATURES, &features); 825 if (ret < 0) { 826 return ret; 827 } 828 829 vdev->negotiated_features = features; 830 vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1); 831 832 if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 833 /* nothing else to do */ 834 return 0; 835 } 836 837 ret = vhost_user_sock(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features); 838 if (ret < 0) { 839 return ret; 840 } 841 842 protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES; 843 ret = vhost_user_sock(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features); 844 if (ret < 0) { 845 return ret; 846 } 847 848 dev->protocol_features = protocol_features; 849 return 0; 850 } 851 852 static uint16_t 853 virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id) 854 { 855 struct virtio_user_dev *dev = vdev->ctx; 856 857 /* Currently each queue has same queue size */ 858 return dev->queue_size; 859 } 860 861 static int 862 virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq) 863 { 864 struct virtio_user_dev *dev = vdev->ctx; 865 struct vhost_vring_state state; 866 uint16_t queue_idx = vq->vq_queue_index; 867 void *queue_mem; 868 uint64_t desc_addr, avail_addr, used_addr; 869 int callfd, kickfd, rc; 870 871 if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) { 872 SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx); 873 return -EEXIST; 874 } 875 876 /* May use invalid flag, but some backend uses kickfd and 877 * callfd as criteria to judge if dev is alive. so finally we 878 * use real event_fd. 879 */ 880 callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 881 if (callfd < 0) { 882 SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno)); 883 return -errno; 884 } 885 886 kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); 887 if (kickfd < 0) { 888 SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno)); 889 close(callfd); 890 return -errno; 891 } 892 893 queue_mem = spdk_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL, 894 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 895 if (queue_mem == NULL) { 896 close(kickfd); 897 close(callfd); 898 return -ENOMEM; 899 } 900 901 vq->vq_ring_mem = SPDK_VTOPHYS_ERROR; 902 vq->vq_ring_virt_mem = queue_mem; 903 904 state.index = vq->vq_queue_index; 905 state.num = vq->vq_nentries; 906 907 if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) { 908 rc = vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state); 909 if (rc < 0) { 910 SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n", 911 spdk_strerror(-rc)); 912 close(kickfd); 913 close(callfd); 914 spdk_free(queue_mem); 915 return -rc; 916 } 917 } 918 919 dev->callfds[queue_idx] = callfd; 920 dev->kickfds[queue_idx] = kickfd; 921 922 desc_addr = (uintptr_t)vq->vq_ring_virt_mem; 923 avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); 924 used_addr = SPDK_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, 925 ring[vq->vq_nentries]), 926 VIRTIO_PCI_VRING_ALIGN); 927 928 dev->vrings[queue_idx].num = vq->vq_nentries; 929 dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr; 930 dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr; 931 dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr; 932 933 return 0; 934 } 935 936 static void 937 virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq) 938 { 939 /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU 940 * correspondingly stops the ioeventfds, and reset the status of 941 * the device. 942 * For modern devices, set queue desc, avail, used in PCI bar to 0, 943 * not see any more behavior in QEMU. 944 * 945 * Here we just care about what information to deliver to vhost-user. 946 * So we just close ioeventfd for now. 947 */ 948 struct virtio_user_dev *dev = vdev->ctx; 949 950 close(dev->callfds[vq->vq_queue_index]); 951 close(dev->kickfds[vq->vq_queue_index]); 952 dev->callfds[vq->vq_queue_index] = -1; 953 dev->kickfds[vq->vq_queue_index] = -1; 954 955 spdk_free(vq->vq_ring_virt_mem); 956 } 957 958 static void 959 virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq) 960 { 961 uint64_t buf = 1; 962 struct virtio_user_dev *dev = vdev->ctx; 963 964 if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) { 965 SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno)); 966 } 967 } 968 969 static void 970 virtio_user_destroy(struct virtio_dev *vdev) 971 { 972 struct virtio_user_dev *dev = vdev->ctx; 973 974 if (dev) { 975 close(dev->vhostfd); 976 free(dev); 977 } 978 } 979 980 static void 981 virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 982 { 983 struct virtio_user_dev *dev = vdev->ctx; 984 985 spdk_json_write_named_string(w, "type", "user"); 986 spdk_json_write_named_string(w, "socket", dev->path); 987 } 988 989 static void 990 virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w) 991 { 992 struct virtio_user_dev *dev = vdev->ctx; 993 994 spdk_json_write_named_string(w, "trtype", "user"); 995 spdk_json_write_named_string(w, "traddr", dev->path); 996 spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num); 997 spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0)); 998 } 999 1000 static const struct virtio_dev_ops virtio_user_ops = { 1001 .read_dev_cfg = virtio_user_read_dev_config, 1002 .write_dev_cfg = virtio_user_write_dev_config, 1003 .get_status = virtio_user_get_status, 1004 .set_status = virtio_user_set_status, 1005 .get_features = virtio_user_get_features, 1006 .set_features = virtio_user_set_features, 1007 .destruct_dev = virtio_user_destroy, 1008 .get_queue_size = virtio_user_get_queue_size, 1009 .setup_queue = virtio_user_setup_queue, 1010 .del_queue = virtio_user_del_queue, 1011 .notify_queue = virtio_user_notify_queue, 1012 .dump_json_info = virtio_user_dump_json_info, 1013 .write_json_config = virtio_user_write_json_config, 1014 }; 1015 1016 int 1017 virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, 1018 uint32_t queue_size) 1019 { 1020 struct virtio_user_dev *dev; 1021 int rc; 1022 1023 if (name == NULL) { 1024 SPDK_ERRLOG("No name gived for controller: %s\n", path); 1025 return -EINVAL; 1026 } 1027 1028 dev = calloc(1, sizeof(*dev)); 1029 if (dev == NULL) { 1030 return -ENOMEM; 1031 } 1032 1033 rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev); 1034 if (rc != 0) { 1035 SPDK_ERRLOG("Failed to init device: %s\n", path); 1036 free(dev); 1037 return rc; 1038 } 1039 1040 vdev->is_hw = 0; 1041 1042 snprintf(dev->path, PATH_MAX, "%s", path); 1043 dev->queue_size = queue_size; 1044 1045 rc = virtio_user_dev_setup(vdev); 1046 if (rc < 0) { 1047 SPDK_ERRLOG("backend set up fails\n"); 1048 goto err; 1049 } 1050 1051 rc = vhost_user_sock(dev, VHOST_USER_SET_OWNER, NULL); 1052 if (rc < 0) { 1053 SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc)); 1054 goto err; 1055 } 1056 1057 return 0; 1058 1059 err: 1060 virtio_dev_destruct(vdev); 1061 return rc; 1062 } 1063 SPDK_LOG_REGISTER_COMPONENT(virtio_user) 1064