1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/barrier.h" 41 #include "spdk/vhost.h" 42 #include "vhost_internal.h" 43 44 #include "spdk_internal/memory.h" 45 46 struct vhost_poll_group { 47 struct spdk_thread *thread; 48 TAILQ_ENTRY(vhost_poll_group) tailq; 49 }; 50 51 static TAILQ_HEAD(, vhost_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups); 52 53 static uint32_t *g_num_ctrlrs; 54 55 /* Path to folder where character device will be created. Can be set by user. */ 56 static char dev_dirname[PATH_MAX] = ""; 57 58 static struct spdk_thread *g_fini_thread; 59 static spdk_vhost_fini_cb g_fini_cpl_cb; 60 61 struct spdk_vhost_session_fn_ctx { 62 /** Device pointer obtained before enqueuing the event */ 63 struct spdk_vhost_dev *vdev; 64 65 /** ID of the session to send event to. */ 66 uint32_t vsession_id; 67 68 /** User callback function to be executed on given lcore. */ 69 spdk_vhost_session_fn cb_fn; 70 71 /** Semaphore used to signal that event is done. */ 72 sem_t sem; 73 74 /** Response to be written by enqueued event. */ 75 int response; 76 77 /** Custom user context */ 78 void *user_ctx; 79 }; 80 81 static int new_connection(int vid); 82 static int start_device(int vid); 83 static void stop_device(int vid); 84 static void destroy_connection(int vid); 85 86 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 87 static int get_config(int vid, uint8_t *config, uint32_t len); 88 static int set_config(int vid, uint8_t *config, uint32_t offset, 89 uint32_t size, uint32_t flags); 90 #endif 91 92 const struct vhost_device_ops g_spdk_vhost_ops = { 93 .new_device = start_device, 94 .destroy_device = stop_device, 95 .new_connection = new_connection, 96 .destroy_connection = destroy_connection, 97 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 98 .get_config = get_config, 99 .set_config = set_config, 100 .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough, 101 .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call, 102 .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap, 103 .vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr, 104 #endif 105 }; 106 107 static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER( 108 g_spdk_vhost_devices); 109 static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 110 111 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len) 112 { 113 void *vva; 114 uint64_t newlen; 115 116 newlen = len; 117 vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen); 118 if (newlen != len) { 119 return NULL; 120 } 121 122 return vva; 123 124 } 125 126 static void 127 spdk_vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 128 uint16_t req_id) 129 { 130 struct vring_desc *desc, *desc_table; 131 uint32_t desc_table_size; 132 int rc; 133 134 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 135 return; 136 } 137 138 rc = spdk_vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 139 if (spdk_unlikely(rc != 0)) { 140 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 141 return; 142 } 143 144 do { 145 if (spdk_vhost_vring_desc_is_wr(desc)) { 146 /* To be honest, only pages realy touched should be logged, but 147 * doing so would require tracking those changes in each backed. 148 * Also backend most likely will touch all/most of those pages so 149 * for lets assume we touched all pages passed to as writeable buffers. */ 150 rte_vhost_log_write(vsession->vid, desc->addr, desc->len); 151 } 152 spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 153 } while (desc); 154 } 155 156 static void 157 spdk_vhost_log_used_vring_elem(struct spdk_vhost_session *vsession, 158 struct spdk_vhost_virtqueue *virtqueue, 159 uint16_t idx) 160 { 161 uint64_t offset, len; 162 uint16_t vq_idx; 163 164 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 165 return; 166 } 167 168 offset = offsetof(struct vring_used, ring[idx]); 169 len = sizeof(virtqueue->vring.used->ring[idx]); 170 vq_idx = virtqueue - vsession->virtqueue; 171 172 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 173 } 174 175 static void 176 spdk_vhost_log_used_vring_idx(struct spdk_vhost_session *vsession, 177 struct spdk_vhost_virtqueue *virtqueue) 178 { 179 uint64_t offset, len; 180 uint16_t vq_idx; 181 182 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 183 return; 184 } 185 186 offset = offsetof(struct vring_used, idx); 187 len = sizeof(virtqueue->vring.used->idx); 188 vq_idx = virtqueue - vsession->virtqueue; 189 190 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 191 } 192 193 /* 194 * Get available requests from avail ring. 195 */ 196 uint16_t 197 spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 198 uint16_t reqs_len) 199 { 200 struct rte_vhost_vring *vring = &virtqueue->vring; 201 struct vring_avail *avail = vring->avail; 202 uint16_t size_mask = vring->size - 1; 203 uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx; 204 uint16_t count, i; 205 206 count = avail_idx - last_idx; 207 if (spdk_likely(count == 0)) { 208 return 0; 209 } 210 211 if (spdk_unlikely(count > vring->size)) { 212 /* TODO: the queue is unrecoverably broken and should be marked so. 213 * For now we will fail silently and report there are no new avail entries. 214 */ 215 return 0; 216 } 217 218 count = spdk_min(count, reqs_len); 219 virtqueue->last_avail_idx += count; 220 for (i = 0; i < count; i++) { 221 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 222 } 223 224 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 225 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 226 last_idx, avail_idx, count); 227 228 return count; 229 } 230 231 static bool 232 spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 233 { 234 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 235 } 236 237 int 238 spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 239 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 240 uint32_t *desc_table_size) 241 { 242 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 243 return -1; 244 } 245 246 *desc = &virtqueue->vring.desc[req_idx]; 247 248 if (spdk_vhost_vring_desc_is_indirect(*desc)) { 249 *desc_table_size = (*desc)->len / sizeof(**desc); 250 *desc_table = spdk_vhost_gpa_to_vva(vsession, (*desc)->addr, 251 sizeof(**desc) * *desc_table_size); 252 *desc = *desc_table; 253 if (*desc == NULL) { 254 return -1; 255 } 256 257 return 0; 258 } 259 260 *desc_table = virtqueue->vring.desc; 261 *desc_table_size = virtqueue->vring.size; 262 263 return 0; 264 } 265 266 int 267 spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, 268 struct spdk_vhost_virtqueue *virtqueue) 269 { 270 if (virtqueue->used_req_cnt == 0) { 271 return 0; 272 } 273 274 virtqueue->req_cnt += virtqueue->used_req_cnt; 275 virtqueue->used_req_cnt = 0; 276 277 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 278 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 279 virtqueue - vsession->virtqueue, virtqueue->last_used_idx); 280 281 eventfd_write(virtqueue->vring.callfd, (eventfd_t)1); 282 return 1; 283 } 284 285 286 static void 287 check_session_io_stats(struct spdk_vhost_session *vsession, uint64_t now) 288 { 289 struct spdk_vhost_virtqueue *virtqueue; 290 uint32_t irq_delay_base = vsession->coalescing_delay_time_base; 291 uint32_t io_threshold = vsession->coalescing_io_rate_threshold; 292 int32_t irq_delay; 293 uint32_t req_cnt; 294 uint16_t q_idx; 295 296 if (now < vsession->next_stats_check_time) { 297 return; 298 } 299 300 vsession->next_stats_check_time = now + vsession->stats_check_interval; 301 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 302 virtqueue = &vsession->virtqueue[q_idx]; 303 304 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 305 if (req_cnt <= io_threshold) { 306 continue; 307 } 308 309 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 310 virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay); 311 312 virtqueue->req_cnt = 0; 313 virtqueue->next_event_time = now; 314 } 315 } 316 317 void 318 spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession) 319 { 320 struct spdk_vhost_virtqueue *virtqueue; 321 uint64_t now; 322 uint16_t q_idx; 323 324 if (vsession->coalescing_delay_time_base == 0) { 325 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 326 virtqueue = &vsession->virtqueue[q_idx]; 327 328 if (virtqueue->vring.desc == NULL || 329 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 330 continue; 331 } 332 333 spdk_vhost_vq_used_signal(vsession, virtqueue); 334 } 335 } else { 336 now = spdk_get_ticks(); 337 check_session_io_stats(vsession, now); 338 339 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 340 virtqueue = &vsession->virtqueue[q_idx]; 341 342 /* No need for event right now */ 343 if (now < virtqueue->next_event_time || 344 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 345 continue; 346 } 347 348 if (!spdk_vhost_vq_used_signal(vsession, virtqueue)) { 349 continue; 350 } 351 352 /* Syscall is quite long so update time */ 353 now = spdk_get_ticks(); 354 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 355 } 356 } 357 } 358 359 static int 360 spdk_vhost_session_set_coalescing(struct spdk_vhost_dev *vdev, 361 struct spdk_vhost_session *vsession, void *ctx) 362 { 363 if (vdev == NULL || vsession == NULL) { 364 /* nothing to do */ 365 return 0; 366 } 367 368 vsession->coalescing_delay_time_base = 369 vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL; 370 vsession->coalescing_io_rate_threshold = 371 vdev->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 372 return 0; 373 } 374 375 int 376 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 377 uint32_t iops_threshold) 378 { 379 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 380 uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 381 382 if (delay_time_base >= UINT32_MAX) { 383 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 384 return -EINVAL; 385 } else if (io_rate == 0) { 386 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 387 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS); 388 return -EINVAL; 389 } 390 391 vdev->coalescing_delay_us = delay_base_us; 392 vdev->coalescing_iops_threshold = iops_threshold; 393 394 spdk_vhost_dev_foreach_session(vdev, spdk_vhost_session_set_coalescing, NULL); 395 return 0; 396 } 397 398 void 399 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 400 uint32_t *iops_threshold) 401 { 402 if (delay_base_us) { 403 *delay_base_us = vdev->coalescing_delay_us; 404 } 405 406 if (iops_threshold) { 407 *iops_threshold = vdev->coalescing_iops_threshold; 408 } 409 } 410 411 /* 412 * Enqueue id and len to used ring. 413 */ 414 void 415 spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 416 struct spdk_vhost_virtqueue *virtqueue, 417 uint16_t id, uint32_t len) 418 { 419 struct rte_vhost_vring *vring = &virtqueue->vring; 420 struct vring_used *used = vring->used; 421 uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1); 422 423 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 424 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 425 virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len); 426 427 spdk_vhost_log_req_desc(vsession, virtqueue, id); 428 429 virtqueue->last_used_idx++; 430 used->ring[last_idx].id = id; 431 used->ring[last_idx].len = len; 432 433 /* Ensure the used ring is updated before we log it or increment used->idx. */ 434 spdk_smp_wmb(); 435 436 spdk_vhost_log_used_vring_elem(vsession, virtqueue, last_idx); 437 * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx; 438 spdk_vhost_log_used_vring_idx(vsession, virtqueue); 439 440 /* Ensure all our used ring changes are visible to the guest at the time 441 * of interrupt. 442 * TODO: this is currently an sfence on x86. For other architectures we 443 * will most likely need an smp_mb(), but smp_mb() is an overkill for x86. 444 */ 445 spdk_wmb(); 446 447 virtqueue->used_req_cnt++; 448 } 449 450 int 451 spdk_vhost_vring_desc_get_next(struct vring_desc **desc, 452 struct vring_desc *desc_table, uint32_t desc_table_size) 453 { 454 struct vring_desc *old_desc = *desc; 455 uint16_t next_idx; 456 457 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 458 *desc = NULL; 459 return 0; 460 } 461 462 next_idx = old_desc->next; 463 if (spdk_unlikely(next_idx >= desc_table_size)) { 464 *desc = NULL; 465 return -1; 466 } 467 468 *desc = &desc_table[next_idx]; 469 return 0; 470 } 471 472 bool 473 spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 474 { 475 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 476 } 477 478 int 479 spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 480 uint16_t *iov_index, const struct vring_desc *desc) 481 { 482 uint32_t remaining = desc->len; 483 uint32_t to_boundary; 484 uint32_t len; 485 uintptr_t payload = desc->addr; 486 uintptr_t vva; 487 488 do { 489 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 490 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 491 return -1; 492 } 493 vva = (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload); 494 if (vva == 0) { 495 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 496 return -1; 497 } 498 to_boundary = VALUE_2MB - _2MB_OFFSET(payload); 499 if (spdk_likely(remaining <= to_boundary)) { 500 len = remaining; 501 } else { 502 /* 503 * Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated 504 * from hugepage memory, so this means this descriptor may be described by 505 * discontiguous vhost memory regions. Do not blindly split on the 2MB boundary, 506 * only split it if the two sides of the boundary do not map to the same vhost 507 * memory region. This helps ensure we do not exceed the max number of IOVs 508 * defined by SPDK_VHOST_IOVS_MAX. 509 */ 510 len = to_boundary; 511 while (len < remaining) { 512 if (vva + len != (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload + len)) { 513 break; 514 } 515 len += spdk_min(remaining - len, VALUE_2MB); 516 } 517 } 518 iov[*iov_index].iov_base = (void *)vva; 519 iov[*iov_index].iov_len = len; 520 remaining -= len; 521 payload += len; 522 (*iov_index)++; 523 } while (remaining); 524 525 return 0; 526 } 527 528 static struct spdk_vhost_session * 529 spdk_vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id) 530 { 531 struct spdk_vhost_session *vsession; 532 533 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 534 if (vsession->id == id) { 535 return vsession; 536 } 537 } 538 539 return NULL; 540 } 541 542 struct spdk_vhost_session * 543 spdk_vhost_session_find_by_vid(int vid) 544 { 545 struct spdk_vhost_dev *vdev; 546 struct spdk_vhost_session *vsession; 547 548 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 549 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 550 if (vsession->vid == vid) { 551 return vsession; 552 } 553 } 554 } 555 556 return NULL; 557 } 558 559 #define SHIFT_2MB 21 560 #define SIZE_2MB (1ULL << SHIFT_2MB) 561 #define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB 562 #define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB 563 564 static void 565 spdk_vhost_session_mem_register(struct spdk_vhost_session *vsession) 566 { 567 struct rte_vhost_mem_region *region; 568 uint32_t i; 569 570 for (i = 0; i < vsession->mem->nregions; i++) { 571 uint64_t start, end, len; 572 region = &vsession->mem->regions[i]; 573 start = FLOOR_2MB(region->mmap_addr); 574 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 575 len = end - start; 576 SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n", 577 start, len); 578 579 if (spdk_mem_register((void *)start, len) != 0) { 580 SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n", 581 i); 582 continue; 583 } 584 } 585 } 586 587 static void 588 spdk_vhost_session_mem_unregister(struct spdk_vhost_session *vsession) 589 { 590 struct rte_vhost_mem_region *region; 591 uint32_t i; 592 593 for (i = 0; i < vsession->mem->nregions; i++) { 594 uint64_t start, end, len; 595 region = &vsession->mem->regions[i]; 596 start = FLOOR_2MB(region->mmap_addr); 597 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 598 len = end - start; 599 600 if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) { 601 continue; /* region has not been registered */ 602 } 603 604 if (spdk_mem_unregister((void *)start, len) != 0) { 605 assert(false); 606 } 607 } 608 609 } 610 611 void 612 spdk_vhost_free_reactor(uint32_t lcore) 613 { 614 g_num_ctrlrs[lcore]--; 615 } 616 617 struct spdk_vhost_dev * 618 spdk_vhost_dev_next(struct spdk_vhost_dev *vdev) 619 { 620 if (vdev == NULL) { 621 return TAILQ_FIRST(&g_spdk_vhost_devices); 622 } 623 624 return TAILQ_NEXT(vdev, tailq); 625 } 626 627 struct spdk_vhost_dev * 628 spdk_vhost_dev_find(const char *ctrlr_name) 629 { 630 struct spdk_vhost_dev *vdev; 631 size_t dev_dirname_len = strlen(dev_dirname); 632 633 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 634 ctrlr_name += dev_dirname_len; 635 } 636 637 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 638 if (strcmp(vdev->name, ctrlr_name) == 0) { 639 return vdev; 640 } 641 } 642 643 return NULL; 644 } 645 646 static int 647 spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 648 { 649 int rc; 650 651 if (cpumask == NULL) { 652 return -1; 653 } 654 655 if (mask == NULL) { 656 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 657 return 0; 658 } 659 660 rc = spdk_app_parse_core_mask(mask, cpumask); 661 if (rc < 0) { 662 SPDK_ERRLOG("invalid cpumask %s\n", mask); 663 return -1; 664 } 665 666 if (spdk_cpuset_count(cpumask) == 0) { 667 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 668 spdk_cpuset_fmt(spdk_app_get_core_mask())); 669 return -1; 670 } 671 672 return 0; 673 } 674 675 static void * 676 _start_rte_driver(void *arg) 677 { 678 char *path = arg; 679 680 if (rte_vhost_driver_start(path) != 0) { 681 return NULL; 682 } 683 684 return path; 685 } 686 687 int 688 spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 689 const struct spdk_vhost_dev_backend *backend) 690 { 691 char path[PATH_MAX]; 692 struct stat file_stat; 693 struct spdk_cpuset *cpumask; 694 int rc; 695 696 assert(vdev); 697 if (name == NULL) { 698 SPDK_ERRLOG("Can't register controller with no name\n"); 699 return -EINVAL; 700 } 701 702 cpumask = spdk_cpuset_alloc(); 703 if (!cpumask) { 704 SPDK_ERRLOG("spdk_cpuset_alloc failed\n"); 705 return -ENOMEM; 706 } 707 708 if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) { 709 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 710 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 711 rc = -EINVAL; 712 goto out; 713 } 714 715 if (spdk_vhost_dev_find(name)) { 716 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 717 rc = -EEXIST; 718 goto out; 719 } 720 721 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 722 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 723 name); 724 rc = -EINVAL; 725 goto out; 726 } 727 728 /* Register vhost driver to handle vhost messages. */ 729 if (stat(path, &file_stat) != -1) { 730 if (!S_ISSOCK(file_stat.st_mode)) { 731 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 732 "The file already exists and is not a socket.\n", 733 path); 734 rc = -EIO; 735 goto out; 736 } else if (unlink(path) != 0) { 737 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 738 "The socket already exists and failed to unlink.\n", 739 path); 740 rc = -EIO; 741 goto out; 742 } 743 } 744 745 if (rte_vhost_driver_register(path, 0) != 0) { 746 SPDK_ERRLOG("Could not register controller %s with vhost library\n", name); 747 SPDK_ERRLOG("Check if domain socket %s already exists\n", path); 748 rc = -EIO; 749 goto out; 750 } 751 if (rte_vhost_driver_set_features(path, backend->virtio_features) || 752 rte_vhost_driver_disable_features(path, backend->disabled_features)) { 753 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name); 754 755 rte_vhost_driver_unregister(path); 756 rc = -EIO; 757 goto out; 758 } 759 760 if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) { 761 rte_vhost_driver_unregister(path); 762 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name); 763 rc = -EIO; 764 goto out; 765 } 766 767 vdev->name = strdup(name); 768 vdev->path = strdup(path); 769 if (vdev->name == NULL || vdev->path == NULL) { 770 free(vdev->name); 771 free(vdev->path); 772 rte_vhost_driver_unregister(path); 773 rc = -EIO; 774 goto out; 775 } 776 777 vdev->cpumask = cpumask; 778 vdev->registered = true; 779 vdev->backend = backend; 780 TAILQ_INIT(&vdev->vsessions); 781 TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq); 782 783 spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 784 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 785 786 spdk_vhost_dev_install_rte_compat_hooks(vdev); 787 788 /* The following might start a POSIX thread that polls for incoming 789 * socket connections and calls backend->start/stop_device. These backend 790 * callbacks are also protected by the global SPDK vhost mutex, so we're 791 * safe with not initializing the vdev just yet. 792 */ 793 if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) { 794 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n", 795 name, errno, spdk_strerror(errno)); 796 rte_vhost_driver_unregister(path); 797 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 798 free(vdev->name); 799 free(vdev->path); 800 rc = -EIO; 801 goto out; 802 } 803 804 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 805 return 0; 806 807 out: 808 spdk_cpuset_free(cpumask); 809 return rc; 810 } 811 812 int 813 spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev) 814 { 815 if (!TAILQ_EMPTY(&vdev->vsessions)) { 816 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 817 return -EBUSY; 818 } 819 820 if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) { 821 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 822 "Check if domain socket %s still exists\n", 823 vdev->name, vdev->path); 824 return -EIO; 825 } 826 827 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 828 829 free(vdev->name); 830 free(vdev->path); 831 spdk_cpuset_free(vdev->cpumask); 832 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 833 return 0; 834 } 835 836 static struct spdk_vhost_session * 837 spdk_vhost_session_next(struct spdk_vhost_dev *vdev, unsigned prev_id) 838 { 839 struct spdk_vhost_session *vsession; 840 841 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 842 if (vsession->id > prev_id) { 843 return vsession; 844 } 845 } 846 847 return NULL; 848 } 849 850 const char * 851 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 852 { 853 assert(vdev != NULL); 854 return vdev->name; 855 } 856 857 const struct spdk_cpuset * 858 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 859 { 860 assert(vdev != NULL); 861 return vdev->cpumask; 862 } 863 864 uint32_t 865 spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask) 866 { 867 uint32_t i, selected_core; 868 uint32_t min_ctrlrs; 869 870 min_ctrlrs = INT_MAX; 871 selected_core = spdk_env_get_first_core(); 872 873 SPDK_ENV_FOREACH_CORE(i) { 874 if (!spdk_cpuset_get_cpu(cpumask, i)) { 875 continue; 876 } 877 878 if (g_num_ctrlrs[i] < min_ctrlrs) { 879 selected_core = i; 880 min_ctrlrs = g_num_ctrlrs[i]; 881 } 882 } 883 884 g_num_ctrlrs[selected_core]++; 885 return selected_core; 886 } 887 888 static void 889 complete_session_event(struct spdk_vhost_session *vsession, int response) 890 { 891 struct spdk_vhost_session_fn_ctx *ctx = vsession->event_ctx; 892 893 ctx->response = response; 894 sem_post(&ctx->sem); 895 } 896 897 void 898 spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response) 899 { 900 if (response == 0) { 901 vsession->started = true; 902 assert(vsession->vdev->active_session_num < UINT32_MAX); 903 vsession->vdev->active_session_num++; 904 } 905 complete_session_event(vsession, response); 906 } 907 908 void 909 spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response) 910 { 911 if (response == 0) { 912 vsession->started = false; 913 assert(vsession->vdev->active_session_num > 0); 914 vsession->vdev->active_session_num--; 915 } 916 complete_session_event(vsession, response); 917 } 918 919 static void 920 spdk_vhost_event_cb(void *arg1, void *arg2) 921 { 922 struct spdk_vhost_session_fn_ctx *ctx = arg1; 923 struct spdk_vhost_session *vsession; 924 struct spdk_event *ev; 925 926 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 927 ev = spdk_event_allocate(spdk_env_get_current_core(), 928 spdk_vhost_event_cb, arg1, NULL); 929 spdk_event_call(ev); 930 return; 931 } 932 933 vsession = spdk_vhost_session_find_by_id(ctx->vdev, ctx->vsession_id); 934 ctx->cb_fn(ctx->vdev, vsession, NULL); 935 pthread_mutex_unlock(&g_spdk_vhost_mutex); 936 } 937 938 static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 939 struct spdk_vhost_session *vsession, 940 spdk_vhost_session_fn fn, void *arg); 941 942 static void 943 spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2) 944 { 945 struct spdk_vhost_session_fn_ctx *ctx = arg1; 946 struct spdk_vhost_session *vsession = NULL; 947 struct spdk_vhost_dev *vdev = ctx->vdev; 948 struct spdk_event *ev; 949 int rc; 950 951 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 952 ev = spdk_event_allocate(spdk_env_get_current_core(), 953 spdk_vhost_event_async_foreach_fn, arg1, NULL); 954 spdk_event_call(ev); 955 return; 956 } 957 958 vsession = spdk_vhost_session_find_by_id(vdev, ctx->vsession_id); 959 if (vsession == NULL || !vsession->initialized) { 960 /* The session must have been removed in the meantime, so we 961 * just skip it in our foreach chain 962 */ 963 goto out_unlock_continue; 964 } 965 966 if (vsession->started && 967 (uint32_t)vsession->lcore != spdk_env_get_current_core()) { 968 /* if session has been relocated to other core, it is no longer thread-safe 969 * to access its contents here. Even though we're running under the global 970 * vhost mutex, the session itself (and its pollers) are not. We need to chase 971 * the session thread as many times as necessary. 972 */ 973 ev = spdk_event_allocate(vsession->lcore, 974 spdk_vhost_event_async_foreach_fn, arg1, NULL); 975 spdk_event_call(ev); 976 pthread_mutex_unlock(&g_spdk_vhost_mutex); 977 return; 978 } 979 980 rc = ctx->cb_fn(vdev, vsession, ctx->user_ctx); 981 if (rc < 0) { 982 goto out_unlock; 983 } 984 985 out_unlock_continue: 986 vsession = spdk_vhost_session_next(vdev, ctx->vsession_id); 987 spdk_vhost_external_event_foreach_continue(vdev, vsession, ctx->cb_fn, ctx->user_ctx); 988 out_unlock: 989 pthread_mutex_unlock(&g_spdk_vhost_mutex); 990 free(ctx); 991 } 992 993 int 994 spdk_vhost_session_send_event(int32_t lcore, struct spdk_vhost_session *vsession, 995 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 996 const char *errmsg) 997 { 998 struct spdk_vhost_session_fn_ctx ev_ctx = {0}; 999 struct spdk_event *ev; 1000 struct timespec timeout; 1001 int rc; 1002 1003 rc = sem_init(&ev_ctx.sem, 0, 0); 1004 if (rc != 0) { 1005 SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n"); 1006 return -errno; 1007 } 1008 1009 ev_ctx.vdev = vsession->vdev; 1010 ev_ctx.vsession_id = vsession->id; 1011 ev_ctx.cb_fn = cb_fn; 1012 1013 vsession->lcore = lcore; 1014 vsession->event_ctx = &ev_ctx; 1015 ev = spdk_event_allocate(lcore, spdk_vhost_event_cb, &ev_ctx, NULL); 1016 assert(ev); 1017 spdk_event_call(ev); 1018 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1019 1020 clock_gettime(CLOCK_REALTIME, &timeout); 1021 timeout.tv_sec += timeout_sec; 1022 1023 rc = sem_timedwait(&ev_ctx.sem, &timeout); 1024 if (rc != 0) { 1025 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 1026 sem_wait(&ev_ctx.sem); 1027 } 1028 1029 sem_destroy(&ev_ctx.sem); 1030 pthread_mutex_lock(&g_spdk_vhost_mutex); 1031 vsession->event_ctx = NULL; 1032 return ev_ctx.response; 1033 } 1034 1035 static int 1036 spdk_vhost_event_async_send_foreach_continue(struct spdk_vhost_session *vsession, 1037 spdk_vhost_session_fn cb_fn, void *arg) 1038 { 1039 struct spdk_vhost_dev *vdev = vsession->vdev; 1040 struct spdk_vhost_session_fn_ctx *ev_ctx; 1041 struct spdk_event *ev; 1042 1043 ev_ctx = calloc(1, sizeof(*ev_ctx)); 1044 if (ev_ctx == NULL) { 1045 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 1046 assert(false); 1047 return -ENOMEM; 1048 } 1049 1050 ev_ctx->vdev = vdev; 1051 ev_ctx->vsession_id = vsession->id; 1052 ev_ctx->cb_fn = cb_fn; 1053 ev_ctx->user_ctx = arg; 1054 1055 ev = spdk_event_allocate(vsession->lcore, 1056 spdk_vhost_event_async_foreach_fn, ev_ctx, NULL); 1057 assert(ev); 1058 spdk_event_call(ev); 1059 1060 return 0; 1061 } 1062 1063 static void 1064 _stop_session(struct spdk_vhost_session *vsession) 1065 { 1066 struct spdk_vhost_dev *vdev = vsession->vdev; 1067 struct spdk_vhost_virtqueue *q; 1068 int rc; 1069 uint16_t i; 1070 1071 rc = vdev->backend->stop_session(vsession); 1072 if (rc != 0) { 1073 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid); 1074 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1075 return; 1076 } 1077 1078 for (i = 0; i < vsession->max_queues; i++) { 1079 q = &vsession->virtqueue[i]; 1080 if (q->vring.desc == NULL) { 1081 continue; 1082 } 1083 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx); 1084 } 1085 1086 spdk_vhost_session_mem_unregister(vsession); 1087 free(vsession->mem); 1088 } 1089 1090 static void 1091 stop_device(int vid) 1092 { 1093 struct spdk_vhost_session *vsession; 1094 1095 pthread_mutex_lock(&g_spdk_vhost_mutex); 1096 vsession = spdk_vhost_session_find_by_vid(vid); 1097 if (vsession == NULL) { 1098 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1099 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1100 return; 1101 } 1102 1103 if (!vsession->started) { 1104 /* already stopped, nothing to do */ 1105 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1106 return; 1107 } 1108 1109 _stop_session(vsession); 1110 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1111 } 1112 1113 static int 1114 start_device(int vid) 1115 { 1116 struct spdk_vhost_dev *vdev; 1117 struct spdk_vhost_session *vsession; 1118 int rc = -1; 1119 uint16_t i; 1120 1121 pthread_mutex_lock(&g_spdk_vhost_mutex); 1122 1123 vsession = spdk_vhost_session_find_by_vid(vid); 1124 if (vsession == NULL) { 1125 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1126 goto out; 1127 } 1128 1129 vdev = vsession->vdev; 1130 if (vsession->started) { 1131 /* already started, nothing to do */ 1132 rc = 0; 1133 goto out; 1134 } 1135 1136 vsession->max_queues = 0; 1137 memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue)); 1138 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1139 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1140 1141 if (rte_vhost_get_vhost_vring(vid, i, &q->vring)) { 1142 continue; 1143 } 1144 1145 if (q->vring.desc == NULL || q->vring.size == 0) { 1146 continue; 1147 } 1148 1149 if (rte_vhost_get_vring_base(vsession->vid, i, &q->last_avail_idx, &q->last_used_idx)) { 1150 q->vring.desc = NULL; 1151 continue; 1152 } 1153 1154 /* Disable notifications. */ 1155 if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) { 1156 SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i); 1157 goto out; 1158 } 1159 1160 vsession->max_queues = i + 1; 1161 } 1162 1163 if (rte_vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) { 1164 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1165 goto out; 1166 } 1167 1168 if (rte_vhost_get_mem_table(vid, &vsession->mem) != 0) { 1169 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1170 goto out; 1171 } 1172 1173 for (i = 0; i < vsession->mem->nregions; i++) { 1174 uint64_t mmap_size = vsession->mem->regions[i].mmap_size; 1175 1176 if (mmap_size & MASK_2MB) { 1177 SPDK_ERRLOG("vhost device %d: Guest mmaped memory size %" PRIx64 1178 " is not a 2MB multiple\n", vid, mmap_size); 1179 free(vsession->mem); 1180 goto out; 1181 } 1182 } 1183 1184 /* 1185 * Not sure right now but this look like some kind of QEMU bug and guest IO 1186 * might be frozed without kicking all queues after live-migration. This look like 1187 * the previous vhost instance failed to effectively deliver all interrupts before 1188 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1189 * should be ignored by guest virtio driver. 1190 * 1191 * Tested on QEMU 2.10.91 and 2.11.50. 1192 */ 1193 for (i = 0; i < vsession->max_queues; i++) { 1194 if (vsession->virtqueue[i].vring.callfd != -1) { 1195 eventfd_write(vsession->virtqueue[i].vring.callfd, (eventfd_t)1); 1196 } 1197 } 1198 1199 spdk_vhost_session_set_coalescing(vdev, vsession, NULL); 1200 spdk_vhost_session_mem_register(vsession); 1201 vsession->initialized = true; 1202 rc = vdev->backend->start_session(vsession); 1203 if (rc != 0) { 1204 spdk_vhost_session_mem_unregister(vsession); 1205 free(vsession->mem); 1206 goto out; 1207 } 1208 1209 out: 1210 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1211 return rc; 1212 } 1213 1214 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1215 static int 1216 get_config(int vid, uint8_t *config, uint32_t len) 1217 { 1218 struct spdk_vhost_session *vsession; 1219 struct spdk_vhost_dev *vdev; 1220 int rc = -1; 1221 1222 pthread_mutex_lock(&g_spdk_vhost_mutex); 1223 vsession = spdk_vhost_session_find_by_vid(vid); 1224 if (vsession == NULL) { 1225 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1226 goto out; 1227 } 1228 1229 vdev = vsession->vdev; 1230 if (vdev->backend->vhost_get_config) { 1231 rc = vdev->backend->vhost_get_config(vdev, config, len); 1232 } 1233 1234 out: 1235 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1236 return rc; 1237 } 1238 1239 static int 1240 set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1241 { 1242 struct spdk_vhost_session *vsession; 1243 struct spdk_vhost_dev *vdev; 1244 int rc = -1; 1245 1246 pthread_mutex_lock(&g_spdk_vhost_mutex); 1247 vsession = spdk_vhost_session_find_by_vid(vid); 1248 if (vsession == NULL) { 1249 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1250 goto out; 1251 } 1252 1253 vdev = vsession->vdev; 1254 if (vdev->backend->vhost_set_config) { 1255 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1256 } 1257 1258 out: 1259 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1260 return rc; 1261 } 1262 #endif 1263 1264 int 1265 spdk_vhost_set_socket_path(const char *basename) 1266 { 1267 int ret; 1268 1269 if (basename && strlen(basename) > 0) { 1270 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1271 if (ret <= 0) { 1272 return -EINVAL; 1273 } 1274 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1275 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1276 return -EINVAL; 1277 } 1278 1279 if (dev_dirname[ret - 1] != '/') { 1280 dev_dirname[ret] = '/'; 1281 dev_dirname[ret + 1] = '\0'; 1282 } 1283 } 1284 1285 return 0; 1286 } 1287 1288 void 1289 spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1290 { 1291 assert(vdev->backend->dump_info_json != NULL); 1292 vdev->backend->dump_info_json(vdev, w); 1293 } 1294 1295 int 1296 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1297 { 1298 if (vdev->pending_async_op_num) { 1299 return -EBUSY; 1300 } 1301 1302 return vdev->backend->remove_device(vdev); 1303 } 1304 1305 static int 1306 new_connection(int vid) 1307 { 1308 struct spdk_vhost_dev *vdev; 1309 struct spdk_vhost_session *vsession; 1310 char ifname[PATH_MAX]; 1311 1312 pthread_mutex_lock(&g_spdk_vhost_mutex); 1313 1314 if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) { 1315 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid); 1316 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1317 return -1; 1318 } 1319 1320 vdev = spdk_vhost_dev_find(ifname); 1321 if (vdev == NULL) { 1322 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1323 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1324 return -1; 1325 } 1326 1327 /* We expect sessions inside vdev->vsessions to be sorted in ascending 1328 * order in regard of vsession->id. For now we always set id = vsessions_cnt++ 1329 * and append each session to the very end of the vsessions list. 1330 * This is required for spdk_vhost_dev_foreach_session() to work. 1331 */ 1332 if (vdev->vsessions_num == UINT_MAX) { 1333 assert(false); 1334 return -EINVAL; 1335 } 1336 1337 if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) + 1338 vdev->backend->session_ctx_size)) { 1339 SPDK_ERRLOG("vsession alloc failed\n"); 1340 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1341 return -1; 1342 } 1343 memset(vsession, 0, sizeof(*vsession) + vdev->backend->session_ctx_size); 1344 1345 vsession->vdev = vdev; 1346 vsession->id = vdev->vsessions_num++; 1347 vsession->vid = vid; 1348 vsession->lcore = -1; 1349 vsession->started = false; 1350 vsession->initialized = false; 1351 vsession->next_stats_check_time = 0; 1352 vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS * 1353 spdk_get_ticks_hz() / 1000UL; 1354 TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq); 1355 1356 spdk_vhost_session_install_rte_compat_hooks(vsession); 1357 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1358 return 0; 1359 } 1360 1361 static void 1362 destroy_connection(int vid) 1363 { 1364 struct spdk_vhost_session *vsession; 1365 1366 pthread_mutex_lock(&g_spdk_vhost_mutex); 1367 vsession = spdk_vhost_session_find_by_vid(vid); 1368 if (vsession == NULL) { 1369 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1370 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1371 return; 1372 } 1373 1374 if (vsession->started) { 1375 _stop_session(vsession); 1376 } 1377 1378 TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); 1379 free(vsession); 1380 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1381 } 1382 1383 static void 1384 spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 1385 struct spdk_vhost_session *vsession, 1386 spdk_vhost_session_fn fn, void *arg) 1387 { 1388 int rc; 1389 1390 if (vsession == NULL) { 1391 goto out_finish_foreach; 1392 } 1393 1394 while (!vsession->started) { 1395 if (vsession->initialized) { 1396 rc = fn(vdev, vsession, arg); 1397 if (rc < 0) { 1398 return; 1399 } 1400 } 1401 1402 vsession = spdk_vhost_session_next(vdev, vsession->id); 1403 if (vsession == NULL) { 1404 goto out_finish_foreach; 1405 } 1406 } 1407 1408 spdk_vhost_event_async_send_foreach_continue(vsession, fn, arg); 1409 return; 1410 1411 out_finish_foreach: 1412 /* there are no more sessions to iterate through, so call the 1413 * fn one last time with vsession == NULL 1414 */ 1415 assert(vdev->pending_async_op_num > 0); 1416 vdev->pending_async_op_num--; 1417 fn(vdev, NULL, arg); 1418 } 1419 1420 void 1421 spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *vdev, 1422 spdk_vhost_session_fn fn, void *arg) 1423 { 1424 struct spdk_vhost_session *vsession = TAILQ_FIRST(&vdev->vsessions); 1425 1426 assert(vdev->pending_async_op_num < UINT32_MAX); 1427 vdev->pending_async_op_num++; 1428 spdk_vhost_external_event_foreach_continue(vdev, vsession, fn, arg); 1429 } 1430 1431 void 1432 spdk_vhost_lock(void) 1433 { 1434 pthread_mutex_lock(&g_spdk_vhost_mutex); 1435 } 1436 1437 int 1438 spdk_vhost_trylock(void) 1439 { 1440 return -pthread_mutex_trylock(&g_spdk_vhost_mutex); 1441 } 1442 1443 void 1444 spdk_vhost_unlock(void) 1445 { 1446 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1447 } 1448 1449 static void 1450 vhost_create_poll_group_done(void *ctx) 1451 { 1452 spdk_vhost_init_cb init_cb = ctx; 1453 int ret; 1454 1455 if (TAILQ_EMPTY(&g_poll_groups)) { 1456 /* No threads? Iteration failed? */ 1457 init_cb(-ECHILD); 1458 return; 1459 } 1460 1461 ret = spdk_vhost_scsi_controller_construct(); 1462 if (ret != 0) { 1463 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1464 goto out; 1465 } 1466 1467 ret = spdk_vhost_blk_controller_construct(); 1468 if (ret != 0) { 1469 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1470 goto out; 1471 } 1472 1473 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1474 ret = spdk_vhost_nvme_controller_construct(); 1475 if (ret != 0) { 1476 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1477 goto out; 1478 } 1479 #endif 1480 1481 out: 1482 init_cb(ret); 1483 } 1484 1485 static void 1486 vhost_create_poll_group(void *ctx) 1487 { 1488 struct vhost_poll_group *pg; 1489 1490 pg = calloc(1, sizeof(*pg)); 1491 if (!pg) { 1492 SPDK_ERRLOG("Not enough memory to allocate poll groups\n"); 1493 spdk_app_stop(-ENOMEM); 1494 return; 1495 } 1496 1497 pg->thread = spdk_get_thread(); 1498 TAILQ_INSERT_TAIL(&g_poll_groups, pg, tailq); 1499 } 1500 1501 void 1502 spdk_vhost_init(spdk_vhost_init_cb init_cb) 1503 { 1504 uint32_t last_core; 1505 size_t len; 1506 int ret; 1507 1508 if (dev_dirname[0] == '\0') { 1509 if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) { 1510 SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno)); 1511 ret = -1; 1512 goto err_out; 1513 } 1514 1515 len = strlen(dev_dirname); 1516 if (dev_dirname[len - 1] != '/') { 1517 dev_dirname[len] = '/'; 1518 dev_dirname[len + 1] = '\0'; 1519 } 1520 } 1521 1522 last_core = spdk_env_get_last_core(); 1523 g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t)); 1524 if (!g_num_ctrlrs) { 1525 SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n", 1526 last_core + 1); 1527 ret = -1; 1528 goto err_out; 1529 } 1530 1531 spdk_for_each_thread(vhost_create_poll_group, 1532 init_cb, 1533 vhost_create_poll_group_done); 1534 return; 1535 err_out: 1536 init_cb(ret); 1537 } 1538 1539 static void 1540 _spdk_vhost_fini(void *arg1) 1541 { 1542 struct spdk_vhost_dev *vdev, *tmp; 1543 struct vhost_poll_group *pg, *tpg; 1544 1545 spdk_vhost_lock(); 1546 vdev = spdk_vhost_dev_next(NULL); 1547 while (vdev != NULL) { 1548 tmp = spdk_vhost_dev_next(vdev); 1549 spdk_vhost_dev_remove(vdev); 1550 /* don't care if it fails, there's nothing we can do for now */ 1551 vdev = tmp; 1552 } 1553 spdk_vhost_unlock(); 1554 1555 /* All devices are removed now. */ 1556 free(g_num_ctrlrs); 1557 TAILQ_FOREACH_SAFE(pg, &g_poll_groups, tailq, tpg) { 1558 TAILQ_REMOVE(&g_poll_groups, pg, tailq); 1559 free(pg); 1560 } 1561 g_fini_cpl_cb(); 1562 } 1563 1564 static void * 1565 session_shutdown(void *arg) 1566 { 1567 struct spdk_vhost_dev *vdev = NULL; 1568 1569 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 1570 rte_vhost_driver_unregister(vdev->path); 1571 vdev->registered = false; 1572 } 1573 1574 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1575 spdk_thread_send_msg(g_fini_thread, _spdk_vhost_fini, NULL); 1576 return NULL; 1577 } 1578 1579 void 1580 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1581 { 1582 pthread_t tid; 1583 int rc; 1584 1585 g_fini_thread = spdk_get_thread(); 1586 g_fini_cpl_cb = fini_cb; 1587 1588 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1589 * ops for stopping a device or removing a connection, we need to call it from 1590 * a separate thread to avoid deadlock. 1591 */ 1592 rc = pthread_create(&tid, NULL, &session_shutdown, NULL); 1593 if (rc < 0) { 1594 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1595 abort(); 1596 } 1597 pthread_detach(tid); 1598 } 1599 1600 void 1601 spdk_vhost_config_json(struct spdk_json_write_ctx *w) 1602 { 1603 struct spdk_vhost_dev *vdev; 1604 uint32_t delay_base_us; 1605 uint32_t iops_threshold; 1606 1607 spdk_json_write_array_begin(w); 1608 1609 spdk_vhost_lock(); 1610 vdev = spdk_vhost_dev_next(NULL); 1611 while (vdev != NULL) { 1612 vdev->backend->write_config_json(vdev, w); 1613 1614 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1615 if (delay_base_us) { 1616 spdk_json_write_object_begin(w); 1617 spdk_json_write_named_string(w, "method", "set_vhost_controller_coalescing"); 1618 1619 spdk_json_write_named_object_begin(w, "params"); 1620 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1621 spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us); 1622 spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold); 1623 spdk_json_write_object_end(w); 1624 1625 spdk_json_write_object_end(w); 1626 } 1627 vdev = spdk_vhost_dev_next(vdev); 1628 } 1629 spdk_vhost_unlock(); 1630 1631 spdk_json_write_array_end(w); 1632 } 1633 1634 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1635 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1636