1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/barrier.h" 41 #include "spdk/vhost.h" 42 #include "vhost_internal.h" 43 44 #include "spdk_internal/memory.h" 45 46 static uint32_t *g_num_ctrlrs; 47 48 /* Path to folder where character device will be created. Can be set by user. */ 49 static char dev_dirname[PATH_MAX] = ""; 50 51 struct spdk_vhost_session_fn_ctx { 52 /** Device pointer obtained before enqueuing the event */ 53 struct spdk_vhost_dev *vdev; 54 55 /** ID of the session to send event to. */ 56 uint32_t vsession_id; 57 58 /** User callback function to be executed on given lcore. */ 59 spdk_vhost_session_fn cb_fn; 60 61 /** Semaphore used to signal that event is done. */ 62 sem_t sem; 63 64 /** Response to be written by enqueued event. */ 65 int response; 66 }; 67 68 static int new_connection(int vid); 69 static int start_device(int vid); 70 static void stop_device(int vid); 71 static void destroy_connection(int vid); 72 static int get_config(int vid, uint8_t *config, uint32_t len); 73 static int set_config(int vid, uint8_t *config, uint32_t offset, 74 uint32_t size, uint32_t flags); 75 76 const struct vhost_device_ops g_spdk_vhost_ops = { 77 .new_device = start_device, 78 .destroy_device = stop_device, 79 .get_config = get_config, 80 .set_config = set_config, 81 .new_connection = new_connection, 82 .destroy_connection = destroy_connection, 83 .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough, 84 .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call, 85 .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap, 86 .vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr, 87 }; 88 89 static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER( 90 g_spdk_vhost_devices); 91 static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 92 93 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len) 94 { 95 void *vva; 96 uint64_t newlen; 97 98 newlen = len; 99 vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen); 100 if (newlen != len) { 101 return NULL; 102 } 103 104 return vva; 105 106 } 107 108 static void 109 spdk_vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 110 uint16_t req_id) 111 { 112 struct vring_desc *desc, *desc_table; 113 uint32_t desc_table_size; 114 int rc; 115 116 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 117 return; 118 } 119 120 rc = spdk_vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 121 if (spdk_unlikely(rc != 0)) { 122 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 123 return; 124 } 125 126 do { 127 if (spdk_vhost_vring_desc_is_wr(desc)) { 128 /* To be honest, only pages realy touched should be logged, but 129 * doing so would require tracking those changes in each backed. 130 * Also backend most likely will touch all/most of those pages so 131 * for lets assume we touched all pages passed to as writeable buffers. */ 132 rte_vhost_log_write(vsession->vid, desc->addr, desc->len); 133 } 134 spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 135 } while (desc); 136 } 137 138 static void 139 spdk_vhost_log_used_vring_elem(struct spdk_vhost_session *vsession, 140 struct spdk_vhost_virtqueue *virtqueue, 141 uint16_t idx) 142 { 143 uint64_t offset, len; 144 uint16_t vq_idx; 145 146 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 147 return; 148 } 149 150 offset = offsetof(struct vring_used, ring[idx]); 151 len = sizeof(virtqueue->vring.used->ring[idx]); 152 vq_idx = virtqueue - vsession->virtqueue; 153 154 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 155 } 156 157 static void 158 spdk_vhost_log_used_vring_idx(struct spdk_vhost_session *vsession, 159 struct spdk_vhost_virtqueue *virtqueue) 160 { 161 uint64_t offset, len; 162 uint16_t vq_idx; 163 164 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 165 return; 166 } 167 168 offset = offsetof(struct vring_used, idx); 169 len = sizeof(virtqueue->vring.used->idx); 170 vq_idx = virtqueue - vsession->virtqueue; 171 172 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 173 } 174 175 /* 176 * Get available requests from avail ring. 177 */ 178 uint16_t 179 spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 180 uint16_t reqs_len) 181 { 182 struct rte_vhost_vring *vring = &virtqueue->vring; 183 struct vring_avail *avail = vring->avail; 184 uint16_t size_mask = vring->size - 1; 185 uint16_t last_idx = vring->last_avail_idx, avail_idx = avail->idx; 186 uint16_t count, i; 187 188 count = avail_idx - last_idx; 189 if (spdk_likely(count == 0)) { 190 return 0; 191 } 192 193 if (spdk_unlikely(count > vring->size)) { 194 /* TODO: the queue is unrecoverably broken and should be marked so. 195 * For now we will fail silently and report there are no new avail entries. 196 */ 197 return 0; 198 } 199 200 count = spdk_min(count, reqs_len); 201 vring->last_avail_idx += count; 202 for (i = 0; i < count; i++) { 203 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 204 } 205 206 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 207 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 208 last_idx, avail_idx, count); 209 210 return count; 211 } 212 213 static bool 214 spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 215 { 216 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 217 } 218 219 int 220 spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 221 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 222 uint32_t *desc_table_size) 223 { 224 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 225 return -1; 226 } 227 228 *desc = &virtqueue->vring.desc[req_idx]; 229 230 if (spdk_vhost_vring_desc_is_indirect(*desc)) { 231 assert(spdk_vhost_dev_has_feature(vsession, VIRTIO_RING_F_INDIRECT_DESC)); 232 *desc_table_size = (*desc)->len / sizeof(**desc); 233 *desc_table = spdk_vhost_gpa_to_vva(vsession, (*desc)->addr, 234 sizeof(**desc) * *desc_table_size); 235 *desc = *desc_table; 236 if (*desc == NULL) { 237 return -1; 238 } 239 240 return 0; 241 } 242 243 *desc_table = virtqueue->vring.desc; 244 *desc_table_size = virtqueue->vring.size; 245 246 return 0; 247 } 248 249 int 250 spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, 251 struct spdk_vhost_virtqueue *virtqueue) 252 { 253 if (virtqueue->used_req_cnt == 0) { 254 return 0; 255 } 256 257 virtqueue->req_cnt += virtqueue->used_req_cnt; 258 virtqueue->used_req_cnt = 0; 259 260 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 261 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 262 virtqueue - vsession->virtqueue, virtqueue->vring.last_used_idx); 263 264 eventfd_write(virtqueue->vring.callfd, (eventfd_t)1); 265 return 1; 266 } 267 268 269 static void 270 check_session_io_stats(struct spdk_vhost_session *vsession, uint64_t now) 271 { 272 struct spdk_vhost_virtqueue *virtqueue; 273 uint32_t irq_delay_base = vsession->coalescing_delay_time_base; 274 uint32_t io_threshold = vsession->coalescing_io_rate_threshold; 275 int32_t irq_delay; 276 uint32_t req_cnt; 277 uint16_t q_idx; 278 279 if (now < vsession->next_stats_check_time) { 280 return; 281 } 282 283 vsession->next_stats_check_time = now + vsession->stats_check_interval; 284 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 285 virtqueue = &vsession->virtqueue[q_idx]; 286 287 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 288 if (req_cnt <= io_threshold) { 289 continue; 290 } 291 292 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 293 virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay); 294 295 virtqueue->req_cnt = 0; 296 virtqueue->next_event_time = now; 297 } 298 } 299 300 void 301 spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession) 302 { 303 struct spdk_vhost_virtqueue *virtqueue; 304 uint64_t now; 305 uint16_t q_idx; 306 307 if (vsession->coalescing_delay_time_base == 0) { 308 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 309 virtqueue = &vsession->virtqueue[q_idx]; 310 311 if (virtqueue->vring.desc == NULL || 312 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 313 continue; 314 } 315 316 spdk_vhost_vq_used_signal(vsession, virtqueue); 317 } 318 } else { 319 now = spdk_get_ticks(); 320 check_session_io_stats(vsession, now); 321 322 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 323 virtqueue = &vsession->virtqueue[q_idx]; 324 325 /* No need for event right now */ 326 if (now < virtqueue->next_event_time || 327 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 328 continue; 329 } 330 331 if (!spdk_vhost_vq_used_signal(vsession, virtqueue)) { 332 continue; 333 } 334 335 /* Syscall is quite long so update time */ 336 now = spdk_get_ticks(); 337 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 338 } 339 } 340 } 341 342 static int 343 spdk_vhost_session_set_coalescing(struct spdk_vhost_dev *vdev, 344 struct spdk_vhost_session *vsession, void *ctx) 345 { 346 if (vdev == NULL || vsession == NULL) { 347 /* nothing to do */ 348 return 0; 349 } 350 351 vsession->coalescing_delay_time_base = 352 vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL; 353 vsession->coalescing_io_rate_threshold = 354 vdev->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 355 return 0; 356 } 357 358 int 359 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 360 uint32_t iops_threshold) 361 { 362 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 363 uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 364 365 if (delay_time_base >= UINT32_MAX) { 366 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 367 return -EINVAL; 368 } else if (io_rate == 0) { 369 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 370 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS); 371 return -EINVAL; 372 } 373 374 vdev->coalescing_delay_us = delay_base_us; 375 vdev->coalescing_iops_threshold = iops_threshold; 376 377 spdk_vhost_dev_foreach_session(vdev, spdk_vhost_session_set_coalescing, NULL); 378 return 0; 379 } 380 381 void 382 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 383 uint32_t *iops_threshold) 384 { 385 if (delay_base_us) { 386 *delay_base_us = vdev->coalescing_delay_us; 387 } 388 389 if (iops_threshold) { 390 *iops_threshold = vdev->coalescing_iops_threshold; 391 } 392 } 393 394 /* 395 * Enqueue id and len to used ring. 396 */ 397 void 398 spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 399 struct spdk_vhost_virtqueue *virtqueue, 400 uint16_t id, uint32_t len) 401 { 402 struct rte_vhost_vring *vring = &virtqueue->vring; 403 struct vring_used *used = vring->used; 404 uint16_t last_idx = vring->last_used_idx & (vring->size - 1); 405 406 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 407 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 408 virtqueue - vsession->virtqueue, vring->last_used_idx, id, len); 409 410 spdk_vhost_log_req_desc(vsession, virtqueue, id); 411 412 vring->last_used_idx++; 413 used->ring[last_idx].id = id; 414 used->ring[last_idx].len = len; 415 416 /* Ensure the used ring is updated before we log it or increment used->idx. */ 417 spdk_smp_wmb(); 418 419 spdk_vhost_log_used_vring_elem(vsession, virtqueue, last_idx); 420 * (volatile uint16_t *) &used->idx = vring->last_used_idx; 421 spdk_vhost_log_used_vring_idx(vsession, virtqueue); 422 423 /* Ensure all our used ring changes are visible to the guest at the time 424 * of interrupt. 425 * TODO: this is currently an sfence on x86. For other architectures we 426 * will most likely need an smp_mb(), but smp_mb() is an overkill for x86. 427 */ 428 spdk_wmb(); 429 430 virtqueue->used_req_cnt++; 431 } 432 433 int 434 spdk_vhost_vring_desc_get_next(struct vring_desc **desc, 435 struct vring_desc *desc_table, uint32_t desc_table_size) 436 { 437 struct vring_desc *old_desc = *desc; 438 uint16_t next_idx; 439 440 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 441 *desc = NULL; 442 return 0; 443 } 444 445 next_idx = old_desc->next; 446 if (spdk_unlikely(next_idx >= desc_table_size)) { 447 *desc = NULL; 448 return -1; 449 } 450 451 *desc = &desc_table[next_idx]; 452 return 0; 453 } 454 455 bool 456 spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 457 { 458 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 459 } 460 461 int 462 spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 463 uint16_t *iov_index, const struct vring_desc *desc) 464 { 465 uint32_t remaining = desc->len; 466 uint32_t to_boundary; 467 uint32_t len; 468 uintptr_t payload = desc->addr; 469 uintptr_t vva; 470 471 do { 472 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 473 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 474 return -1; 475 } 476 vva = (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload); 477 if (vva == 0) { 478 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 479 return -1; 480 } 481 to_boundary = VALUE_2MB - _2MB_OFFSET(payload); 482 if (spdk_likely(remaining <= to_boundary)) { 483 len = remaining; 484 } else { 485 /* 486 * Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated 487 * from hugepage memory, so this means this descriptor may be described by 488 * discontiguous vhost memory regions. Do not blindly split on the 2MB boundary, 489 * only split it if the two sides of the boundary do not map to the same vhost 490 * memory region. This helps ensure we do not exceed the max number of IOVs 491 * defined by SPDK_VHOST_IOVS_MAX. 492 */ 493 len = to_boundary; 494 while (len < remaining) { 495 if (vva + len != (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload + len)) { 496 break; 497 } 498 len += spdk_min(remaining - len, VALUE_2MB); 499 } 500 } 501 iov[*iov_index].iov_base = (void *)vva; 502 iov[*iov_index].iov_len = len; 503 remaining -= len; 504 payload += len; 505 (*iov_index)++; 506 } while (remaining); 507 508 return 0; 509 } 510 511 static struct spdk_vhost_session * 512 spdk_vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id) 513 { 514 struct spdk_vhost_session *vsession; 515 516 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 517 if (vsession->id == id) { 518 return vsession; 519 } 520 } 521 522 return NULL; 523 } 524 525 static struct spdk_vhost_session * 526 spdk_vhost_session_find_by_vid(int vid) 527 { 528 struct spdk_vhost_dev *vdev; 529 struct spdk_vhost_session *vsession; 530 531 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 532 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 533 if (vsession->vid == vid) { 534 return vsession; 535 } 536 } 537 } 538 539 return NULL; 540 } 541 542 #define SHIFT_2MB 21 543 #define SIZE_2MB (1ULL << SHIFT_2MB) 544 #define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB 545 #define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB 546 547 static void 548 spdk_vhost_session_mem_register(struct spdk_vhost_session *vsession) 549 { 550 struct rte_vhost_mem_region *region; 551 uint32_t i; 552 553 for (i = 0; i < vsession->mem->nregions; i++) { 554 uint64_t start, end, len; 555 region = &vsession->mem->regions[i]; 556 start = FLOOR_2MB(region->mmap_addr); 557 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 558 len = end - start; 559 SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n", 560 start, len); 561 562 if (spdk_mem_register((void *)start, len) != 0) { 563 SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n", 564 i); 565 continue; 566 } 567 } 568 } 569 570 static void 571 spdk_vhost_session_mem_unregister(struct spdk_vhost_session *vsession) 572 { 573 struct rte_vhost_mem_region *region; 574 uint32_t i; 575 576 for (i = 0; i < vsession->mem->nregions; i++) { 577 uint64_t start, end, len; 578 region = &vsession->mem->regions[i]; 579 start = FLOOR_2MB(region->mmap_addr); 580 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 581 len = end - start; 582 583 if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) { 584 continue; /* region has not been registered */ 585 } 586 587 if (spdk_mem_unregister((void *)start, len) != 0) { 588 assert(false); 589 } 590 } 591 592 } 593 594 void 595 spdk_vhost_free_reactor(uint32_t lcore) 596 { 597 g_num_ctrlrs[lcore]--; 598 } 599 600 struct spdk_vhost_dev * 601 spdk_vhost_dev_next(struct spdk_vhost_dev *vdev) 602 { 603 if (vdev == NULL) { 604 return TAILQ_FIRST(&g_spdk_vhost_devices); 605 } 606 607 return TAILQ_NEXT(vdev, tailq); 608 } 609 610 struct spdk_vhost_dev * 611 spdk_vhost_dev_find(const char *ctrlr_name) 612 { 613 struct spdk_vhost_dev *vdev; 614 size_t dev_dirname_len = strlen(dev_dirname); 615 616 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 617 ctrlr_name += dev_dirname_len; 618 } 619 620 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 621 if (strcmp(vdev->name, ctrlr_name) == 0) { 622 return vdev; 623 } 624 } 625 626 return NULL; 627 } 628 629 static int 630 spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 631 { 632 int rc; 633 634 if (cpumask == NULL) { 635 return -1; 636 } 637 638 if (mask == NULL) { 639 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 640 return 0; 641 } 642 643 rc = spdk_app_parse_core_mask(mask, cpumask); 644 if (rc < 0) { 645 SPDK_ERRLOG("invalid cpumask %s\n", mask); 646 return -1; 647 } 648 649 if (spdk_cpuset_count(cpumask) == 0) { 650 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 651 spdk_cpuset_fmt(spdk_app_get_core_mask())); 652 return -1; 653 } 654 655 return 0; 656 } 657 658 static void * 659 _start_rte_driver(void *arg) 660 { 661 char *path = arg; 662 663 if (rte_vhost_driver_start(path) != 0) { 664 return NULL; 665 } 666 667 return path; 668 } 669 670 int 671 spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 672 const struct spdk_vhost_dev_backend *backend) 673 { 674 char path[PATH_MAX]; 675 struct stat file_stat; 676 struct spdk_cpuset *cpumask; 677 int rc; 678 679 assert(vdev); 680 if (name == NULL) { 681 SPDK_ERRLOG("Can't register controller with no name\n"); 682 return -EINVAL; 683 } 684 685 cpumask = spdk_cpuset_alloc(); 686 if (!cpumask) { 687 SPDK_ERRLOG("spdk_cpuset_alloc failed\n"); 688 return -ENOMEM; 689 } 690 691 if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) { 692 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 693 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 694 rc = -EINVAL; 695 goto out; 696 } 697 698 if (spdk_vhost_dev_find(name)) { 699 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 700 rc = -EEXIST; 701 goto out; 702 } 703 704 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 705 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 706 name); 707 rc = -EINVAL; 708 goto out; 709 } 710 711 /* Register vhost driver to handle vhost messages. */ 712 if (stat(path, &file_stat) != -1) { 713 if (!S_ISSOCK(file_stat.st_mode)) { 714 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 715 "The file already exists and is not a socket.\n", 716 path); 717 rc = -EIO; 718 goto out; 719 } else if (unlink(path) != 0) { 720 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 721 "The socket already exists and failed to unlink.\n", 722 path); 723 rc = -EIO; 724 goto out; 725 } 726 } 727 728 if (rte_vhost_driver_register(path, 0) != 0) { 729 SPDK_ERRLOG("Could not register controller %s with vhost library\n", name); 730 SPDK_ERRLOG("Check if domain socket %s already exists\n", path); 731 rc = -EIO; 732 goto out; 733 } 734 if (rte_vhost_driver_set_features(path, backend->virtio_features) || 735 rte_vhost_driver_disable_features(path, backend->disabled_features)) { 736 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name); 737 738 rte_vhost_driver_unregister(path); 739 rc = -EIO; 740 goto out; 741 } 742 743 if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) { 744 rte_vhost_driver_unregister(path); 745 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name); 746 rc = -EIO; 747 goto out; 748 } 749 750 /* The following might start a POSIX thread that polls for incoming 751 * socket connections and calls backend->start/stop_device. These backend 752 * callbacks are also protected by the global SPDK vhost mutex, so we're 753 * safe with not initializing the vdev just yet. 754 */ 755 if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) { 756 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n", 757 name, errno, spdk_strerror(errno)); 758 rte_vhost_driver_unregister(path); 759 rc = -EIO; 760 goto out; 761 } 762 763 vdev->name = strdup(name); 764 vdev->path = strdup(path); 765 vdev->cpumask = cpumask; 766 vdev->registered = true; 767 vdev->backend = backend; 768 TAILQ_INIT(&vdev->vsessions); 769 spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 770 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 771 772 TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq); 773 774 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 775 return 0; 776 777 out: 778 spdk_cpuset_free(cpumask); 779 return rc; 780 } 781 782 int 783 spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev) 784 { 785 if (!TAILQ_EMPTY(&vdev->vsessions)) { 786 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 787 return -EBUSY; 788 } 789 790 if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) { 791 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 792 "Check if domain socket %s still exists\n", 793 vdev->name, vdev->path); 794 return -EIO; 795 } 796 797 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 798 799 free(vdev->name); 800 free(vdev->path); 801 spdk_cpuset_free(vdev->cpumask); 802 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 803 return 0; 804 } 805 806 static struct spdk_vhost_session * 807 spdk_vhost_session_next(struct spdk_vhost_dev *vdev, unsigned prev_id) 808 { 809 struct spdk_vhost_session *vsession; 810 811 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 812 if (vsession->id > prev_id) { 813 return vsession; 814 } 815 } 816 817 return NULL; 818 } 819 820 const char * 821 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 822 { 823 assert(vdev != NULL); 824 return vdev->name; 825 } 826 827 const struct spdk_cpuset * 828 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 829 { 830 assert(vdev != NULL); 831 return vdev->cpumask; 832 } 833 834 uint32_t 835 spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask) 836 { 837 uint32_t i, selected_core; 838 uint32_t min_ctrlrs; 839 840 min_ctrlrs = INT_MAX; 841 selected_core = spdk_env_get_first_core(); 842 843 SPDK_ENV_FOREACH_CORE(i) { 844 if (!spdk_cpuset_get_cpu(cpumask, i)) { 845 continue; 846 } 847 848 if (g_num_ctrlrs[i] < min_ctrlrs) { 849 selected_core = i; 850 min_ctrlrs = g_num_ctrlrs[i]; 851 } 852 } 853 854 g_num_ctrlrs[selected_core]++; 855 return selected_core; 856 } 857 858 void 859 spdk_vhost_session_event_done(void *event_ctx, int response) 860 { 861 struct spdk_vhost_session_fn_ctx *ctx = event_ctx; 862 863 ctx->response = response; 864 sem_post(&ctx->sem); 865 } 866 867 static void 868 spdk_vhost_event_cb(void *arg1, void *arg2) 869 { 870 struct spdk_vhost_session_fn_ctx *ctx = arg1; 871 struct spdk_vhost_session *vsession; 872 873 vsession = spdk_vhost_session_find_by_id(ctx->vdev, ctx->vsession_id); 874 ctx->cb_fn(ctx->vdev, vsession, ctx); 875 } 876 877 static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 878 struct spdk_vhost_session *vsession, 879 spdk_vhost_session_fn fn, void *arg); 880 881 static void 882 spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2) 883 { 884 struct spdk_vhost_session_fn_ctx *ctx = arg1; 885 struct spdk_vhost_session *vsession = NULL; 886 struct spdk_vhost_dev *vdev = ctx->vdev; 887 struct spdk_event *ev; 888 int rc; 889 890 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 891 ev = spdk_event_allocate(spdk_env_get_current_core(), 892 spdk_vhost_event_async_foreach_fn, arg1, arg2); 893 spdk_event_call(ev); 894 return; 895 } 896 897 vsession = spdk_vhost_session_find_by_id(vdev, ctx->vsession_id); 898 if (vsession == NULL) { 899 /* The session must have been removed in the meantime, so we 900 * just skip it in our foreach chain 901 */ 902 goto out_unlock_continue; 903 } 904 905 if (vsession->lcore >= 0 && 906 (uint32_t)vsession->lcore != spdk_env_get_current_core()) { 907 /* if session has been relocated to other core, it is no longer thread-safe 908 * to access its contents here. Even though we're running under the global 909 * vhost mutex, the session itself (and its pollers) are not. We need to chase 910 * the session thread as many times as necessary. 911 */ 912 ev = spdk_event_allocate(vsession->lcore, 913 spdk_vhost_event_async_foreach_fn, arg1, arg2); 914 spdk_event_call(ev); 915 pthread_mutex_unlock(&g_spdk_vhost_mutex); 916 return; 917 } 918 919 rc = ctx->cb_fn(vdev, vsession, arg2); 920 if (rc < 0) { 921 goto out_unlock; 922 } 923 924 out_unlock_continue: 925 vsession = spdk_vhost_session_next(vdev, ctx->vsession_id); 926 spdk_vhost_external_event_foreach_continue(vdev, vsession, ctx->cb_fn, arg2); 927 out_unlock: 928 pthread_mutex_unlock(&g_spdk_vhost_mutex); 929 free(ctx); 930 } 931 932 int 933 spdk_vhost_session_send_event(struct spdk_vhost_session *vsession, 934 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 935 const char *errmsg) 936 { 937 struct spdk_vhost_session_fn_ctx ev_ctx = {0}; 938 struct spdk_event *ev; 939 struct timespec timeout; 940 int rc; 941 942 rc = sem_init(&ev_ctx.sem, 0, 0); 943 if (rc != 0) { 944 SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n"); 945 return -errno; 946 } 947 948 ev_ctx.vdev = vsession->vdev; 949 ev_ctx.vsession_id = vsession->id; 950 ev_ctx.cb_fn = cb_fn; 951 ev = spdk_event_allocate(vsession->lcore, spdk_vhost_event_cb, &ev_ctx, NULL); 952 assert(ev); 953 spdk_event_call(ev); 954 pthread_mutex_unlock(&g_spdk_vhost_mutex); 955 956 clock_gettime(CLOCK_REALTIME, &timeout); 957 timeout.tv_sec += timeout_sec; 958 959 rc = sem_timedwait(&ev_ctx.sem, &timeout); 960 if (rc != 0) { 961 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 962 sem_wait(&ev_ctx.sem); 963 } 964 965 sem_destroy(&ev_ctx.sem); 966 pthread_mutex_lock(&g_spdk_vhost_mutex); 967 return ev_ctx.response; 968 } 969 970 static int 971 spdk_vhost_event_async_send_foreach_continue(struct spdk_vhost_session *vsession, 972 spdk_vhost_session_fn cb_fn, void *arg) 973 { 974 struct spdk_vhost_dev *vdev = vsession->vdev; 975 struct spdk_vhost_session_fn_ctx *ev_ctx; 976 struct spdk_event *ev; 977 978 ev_ctx = calloc(1, sizeof(*ev_ctx)); 979 if (ev_ctx == NULL) { 980 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 981 assert(false); 982 return -ENOMEM; 983 } 984 985 ev_ctx->vdev = vdev; 986 ev_ctx->vsession_id = vsession->id; 987 ev_ctx->cb_fn = cb_fn; 988 989 ev = spdk_event_allocate(vsession->lcore, 990 spdk_vhost_event_async_foreach_fn, ev_ctx, arg); 991 assert(ev); 992 spdk_event_call(ev); 993 994 return 0; 995 } 996 997 static void 998 stop_device(int vid) 999 { 1000 struct spdk_vhost_dev *vdev; 1001 struct spdk_vhost_session *vsession; 1002 struct rte_vhost_vring *q; 1003 int rc; 1004 uint16_t i; 1005 1006 pthread_mutex_lock(&g_spdk_vhost_mutex); 1007 vsession = spdk_vhost_session_find_by_vid(vid); 1008 if (vsession == NULL) { 1009 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1010 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1011 return; 1012 } 1013 1014 vdev = vsession->vdev; 1015 if (vsession->lcore == -1) { 1016 SPDK_ERRLOG("Controller %s is not loaded.\n", vdev->name); 1017 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1018 return; 1019 } 1020 1021 rc = vdev->backend->stop_session(vsession); 1022 if (rc != 0) { 1023 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vid); 1024 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1025 return; 1026 } 1027 1028 for (i = 0; i < vsession->max_queues; i++) { 1029 q = &vsession->virtqueue[i].vring; 1030 if (q->desc == NULL) { 1031 continue; 1032 } 1033 rte_vhost_set_vhost_vring_last_idx(vsession->vid, i, q->last_avail_idx, q->last_used_idx); 1034 } 1035 1036 spdk_vhost_session_mem_unregister(vsession); 1037 free(vsession->mem); 1038 vsession->lcore = -1; 1039 assert(vdev->active_session_num > 0); 1040 vdev->active_session_num--; 1041 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1042 } 1043 1044 static int 1045 start_device(int vid) 1046 { 1047 struct spdk_vhost_dev *vdev; 1048 struct spdk_vhost_session *vsession; 1049 int rc = -1; 1050 uint16_t i; 1051 1052 pthread_mutex_lock(&g_spdk_vhost_mutex); 1053 1054 vsession = spdk_vhost_session_find_by_vid(vid); 1055 if (vsession == NULL) { 1056 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1057 goto out; 1058 } 1059 1060 vdev = vsession->vdev; 1061 if (vsession->lcore != -1) { 1062 SPDK_ERRLOG("Controller %s already loaded.\n", vdev->name); 1063 goto out; 1064 } 1065 1066 vsession->max_queues = 0; 1067 memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue)); 1068 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1069 if (rte_vhost_get_vhost_vring(vid, i, &vsession->virtqueue[i].vring)) { 1070 continue; 1071 } 1072 1073 if (vsession->virtqueue[i].vring.desc == NULL || 1074 vsession->virtqueue[i].vring.size == 0) { 1075 continue; 1076 } 1077 1078 /* Disable notifications. */ 1079 if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) { 1080 SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i); 1081 goto out; 1082 } 1083 1084 vsession->max_queues = i + 1; 1085 } 1086 1087 if (rte_vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) { 1088 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1089 goto out; 1090 } 1091 1092 if (rte_vhost_get_mem_table(vid, &vsession->mem) != 0) { 1093 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1094 goto out; 1095 } 1096 1097 for (i = 0; i < vsession->mem->nregions; i++) { 1098 if (vsession->mem->regions[i].size & MASK_2MB) { 1099 SPDK_ERRLOG("vhost device %d: Guest memory size is not a 2MB multiple\n", vid); 1100 free(vsession->mem); 1101 goto out; 1102 } 1103 } 1104 1105 /* 1106 * Not sure right now but this look like some kind of QEMU bug and guest IO 1107 * might be frozed without kicking all queues after live-migration. This look like 1108 * the previous vhost instance failed to effectively deliver all interrupts before 1109 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1110 * should be ignored by guest virtio driver. 1111 * 1112 * Tested on QEMU 2.10.91 and 2.11.50. 1113 */ 1114 for (i = 0; i < vsession->max_queues; i++) { 1115 if (vsession->virtqueue[i].vring.callfd != -1) { 1116 eventfd_write(vsession->virtqueue[i].vring.callfd, (eventfd_t)1); 1117 } 1118 } 1119 1120 spdk_vhost_session_set_coalescing(vdev, vsession, NULL); 1121 spdk_vhost_session_mem_register(vsession); 1122 rc = vdev->backend->start_session(vsession); 1123 if (rc != 0) { 1124 spdk_vhost_session_mem_unregister(vsession); 1125 free(vsession->mem); 1126 goto out; 1127 } 1128 1129 assert(vdev->active_session_num < UINT32_MAX); 1130 vdev->active_session_num++; 1131 out: 1132 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1133 return rc; 1134 } 1135 1136 static int 1137 get_config(int vid, uint8_t *config, uint32_t len) 1138 { 1139 struct spdk_vhost_session *vsession; 1140 struct spdk_vhost_dev *vdev; 1141 int rc = -1; 1142 1143 pthread_mutex_lock(&g_spdk_vhost_mutex); 1144 vsession = spdk_vhost_session_find_by_vid(vid); 1145 if (vsession == NULL) { 1146 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1147 goto out; 1148 } 1149 1150 vdev = vsession->vdev; 1151 if (vdev->backend->vhost_get_config) { 1152 rc = vdev->backend->vhost_get_config(vdev, config, len); 1153 } 1154 1155 out: 1156 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1157 return rc; 1158 } 1159 1160 static int 1161 set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1162 { 1163 struct spdk_vhost_session *vsession; 1164 struct spdk_vhost_dev *vdev; 1165 int rc = -1; 1166 1167 pthread_mutex_lock(&g_spdk_vhost_mutex); 1168 vsession = spdk_vhost_session_find_by_vid(vid); 1169 if (vsession == NULL) { 1170 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1171 goto out; 1172 } 1173 1174 vdev = vsession->vdev; 1175 if (vdev->backend->vhost_set_config) { 1176 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1177 } 1178 1179 out: 1180 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1181 return rc; 1182 } 1183 1184 int 1185 spdk_vhost_set_socket_path(const char *basename) 1186 { 1187 int ret; 1188 1189 if (basename && strlen(basename) > 0) { 1190 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1191 if (ret <= 0) { 1192 return -EINVAL; 1193 } 1194 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1195 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1196 return -EINVAL; 1197 } 1198 1199 if (dev_dirname[ret - 1] != '/') { 1200 dev_dirname[ret] = '/'; 1201 dev_dirname[ret + 1] = '\0'; 1202 } 1203 } 1204 1205 return 0; 1206 } 1207 1208 static void * 1209 session_shutdown(void *arg) 1210 { 1211 struct spdk_vhost_dev *vdev = NULL; 1212 1213 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 1214 rte_vhost_driver_unregister(vdev->path); 1215 vdev->registered = false; 1216 } 1217 1218 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1219 spdk_event_call((struct spdk_event *)arg); 1220 return NULL; 1221 } 1222 1223 void 1224 spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1225 { 1226 assert(vdev->backend->dump_info_json != NULL); 1227 vdev->backend->dump_info_json(vdev, w); 1228 } 1229 1230 int 1231 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1232 { 1233 if (vdev->pending_async_op_num) { 1234 return -EBUSY; 1235 } 1236 1237 return vdev->backend->remove_device(vdev); 1238 } 1239 1240 static int 1241 new_connection(int vid) 1242 { 1243 struct spdk_vhost_dev *vdev; 1244 struct spdk_vhost_session *vsession; 1245 char ifname[PATH_MAX]; 1246 1247 pthread_mutex_lock(&g_spdk_vhost_mutex); 1248 1249 if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) { 1250 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid); 1251 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1252 return -1; 1253 } 1254 1255 vdev = spdk_vhost_dev_find(ifname); 1256 if (vdev == NULL) { 1257 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1258 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1259 return -1; 1260 } 1261 1262 /* We expect sessions inside vdev->vsessions to be sorted in ascending 1263 * order in regard of vsession->id. For now we always set id = vsessions_cnt++ 1264 * and append each session to the very end of the vsessions list. 1265 * This is required for spdk_vhost_dev_foreach_session() to work. 1266 */ 1267 if (vdev->vsessions_num == UINT_MAX) { 1268 assert(false); 1269 return -EINVAL; 1270 } 1271 1272 vsession = spdk_dma_zmalloc(sizeof(struct spdk_vhost_session) + 1273 vdev->backend->session_ctx_size, 1274 SPDK_CACHE_LINE_SIZE, NULL); 1275 if (vsession == NULL) { 1276 SPDK_ERRLOG("spdk_dma_zmalloc failed\n"); 1277 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1278 return -1; 1279 } 1280 1281 vsession->vdev = vdev; 1282 vsession->id = vdev->vsessions_num++; 1283 vsession->vid = vid; 1284 vsession->lcore = -1; 1285 vsession->next_stats_check_time = 0; 1286 vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS * 1287 spdk_get_ticks_hz() / 1000UL; 1288 TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq); 1289 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1290 return 0; 1291 } 1292 1293 static void 1294 destroy_connection(int vid) 1295 { 1296 struct spdk_vhost_session *vsession; 1297 1298 pthread_mutex_lock(&g_spdk_vhost_mutex); 1299 vsession = spdk_vhost_session_find_by_vid(vid); 1300 if (vsession == NULL) { 1301 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1302 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1303 return; 1304 } 1305 1306 TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); 1307 spdk_dma_free(vsession); 1308 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1309 } 1310 1311 static void 1312 spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 1313 struct spdk_vhost_session *vsession, 1314 spdk_vhost_session_fn fn, void *arg) 1315 { 1316 int rc; 1317 1318 if (vsession == NULL) { 1319 goto out_finish_foreach; 1320 } 1321 1322 while (vsession->lcore == -1) { 1323 rc = fn(vdev, vsession, arg); 1324 if (rc < 0) { 1325 return; 1326 } 1327 vsession = spdk_vhost_session_next(vdev, vsession->id); 1328 if (vsession == NULL) { 1329 goto out_finish_foreach; 1330 } 1331 } 1332 1333 spdk_vhost_event_async_send_foreach_continue(vsession, fn, arg); 1334 return; 1335 1336 out_finish_foreach: 1337 /* there are no more sessions to iterate through, so call the 1338 * fn one last time with vsession == NULL 1339 */ 1340 assert(vdev->pending_async_op_num > 0); 1341 vdev->pending_async_op_num--; 1342 fn(vdev, NULL, arg); 1343 } 1344 1345 void 1346 spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *vdev, 1347 spdk_vhost_session_fn fn, void *arg) 1348 { 1349 struct spdk_vhost_session *vsession = TAILQ_FIRST(&vdev->vsessions); 1350 1351 assert(vdev->pending_async_op_num < UINT32_MAX); 1352 vdev->pending_async_op_num++; 1353 spdk_vhost_external_event_foreach_continue(vdev, vsession, fn, arg); 1354 } 1355 1356 void 1357 spdk_vhost_lock(void) 1358 { 1359 pthread_mutex_lock(&g_spdk_vhost_mutex); 1360 } 1361 1362 void 1363 spdk_vhost_unlock(void) 1364 { 1365 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1366 } 1367 1368 int 1369 spdk_vhost_init(void) 1370 { 1371 uint32_t last_core; 1372 size_t len; 1373 int ret; 1374 1375 if (dev_dirname[0] == '\0') { 1376 if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) { 1377 SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno)); 1378 return -1; 1379 } 1380 1381 len = strlen(dev_dirname); 1382 if (dev_dirname[len - 1] != '/') { 1383 dev_dirname[len] = '/'; 1384 dev_dirname[len + 1] = '\0'; 1385 } 1386 } 1387 1388 last_core = spdk_env_get_last_core(); 1389 g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t)); 1390 if (!g_num_ctrlrs) { 1391 SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n", 1392 last_core + 1); 1393 return -1; 1394 } 1395 1396 ret = spdk_vhost_scsi_controller_construct(); 1397 if (ret != 0) { 1398 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1399 return -1; 1400 } 1401 1402 ret = spdk_vhost_blk_controller_construct(); 1403 if (ret != 0) { 1404 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1405 return -1; 1406 } 1407 1408 ret = spdk_vhost_nvme_controller_construct(); 1409 if (ret != 0) { 1410 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1411 return -1; 1412 } 1413 1414 return 0; 1415 } 1416 1417 static void 1418 _spdk_vhost_fini(void *arg1, void *arg2) 1419 { 1420 spdk_vhost_fini_cb fini_cb = arg1; 1421 struct spdk_vhost_dev *vdev, *tmp; 1422 1423 spdk_vhost_lock(); 1424 vdev = spdk_vhost_dev_next(NULL); 1425 while (vdev != NULL) { 1426 tmp = spdk_vhost_dev_next(vdev); 1427 spdk_vhost_dev_remove(vdev); 1428 /* don't care if it fails, there's nothing we can do for now */ 1429 vdev = tmp; 1430 } 1431 spdk_vhost_unlock(); 1432 1433 /* All devices are removed now. */ 1434 free(g_num_ctrlrs); 1435 fini_cb(); 1436 } 1437 1438 void 1439 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1440 { 1441 pthread_t tid; 1442 int rc; 1443 struct spdk_event *fini_ev; 1444 1445 fini_ev = spdk_event_allocate(spdk_env_get_current_core(), _spdk_vhost_fini, fini_cb, NULL); 1446 1447 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1448 * ops for stopping a device or removing a connection, we need to call it from 1449 * a separate thread to avoid deadlock. 1450 */ 1451 rc = pthread_create(&tid, NULL, &session_shutdown, fini_ev); 1452 if (rc < 0) { 1453 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1454 abort(); 1455 } 1456 pthread_detach(tid); 1457 } 1458 1459 void 1460 spdk_vhost_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev) 1461 { 1462 struct spdk_vhost_dev *vdev; 1463 uint32_t delay_base_us; 1464 uint32_t iops_threshold; 1465 1466 spdk_json_write_array_begin(w); 1467 1468 spdk_vhost_lock(); 1469 vdev = spdk_vhost_dev_next(NULL); 1470 while (vdev != NULL) { 1471 vdev->backend->write_config_json(vdev, w); 1472 1473 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1474 if (delay_base_us) { 1475 spdk_json_write_object_begin(w); 1476 spdk_json_write_named_string(w, "method", "set_vhost_controller_coalescing"); 1477 1478 spdk_json_write_named_object_begin(w, "params"); 1479 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1480 spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us); 1481 spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold); 1482 spdk_json_write_object_end(w); 1483 1484 spdk_json_write_object_end(w); 1485 } 1486 vdev = spdk_vhost_dev_next(vdev); 1487 } 1488 spdk_vhost_unlock(); 1489 1490 spdk_json_write_array_end(w); 1491 spdk_event_call(done_ev); 1492 } 1493 1494 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1495 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1496