1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/barrier.h" 41 #include "spdk/vhost.h" 42 #include "vhost_internal.h" 43 44 #include "spdk_internal/memory.h" 45 46 static uint32_t *g_num_ctrlrs; 47 48 /* Path to folder where character device will be created. Can be set by user. */ 49 static char dev_dirname[PATH_MAX] = ""; 50 51 struct spdk_vhost_session_fn_ctx { 52 /** Device pointer obtained before enqueuing the event */ 53 struct spdk_vhost_dev *vdev; 54 55 /** ID of the session to send event to. */ 56 uint32_t vsession_id; 57 58 /** User callback function to be executed on given lcore. */ 59 spdk_vhost_session_fn cb_fn; 60 61 /** Semaphore used to signal that event is done. */ 62 sem_t sem; 63 64 /** Response to be written by enqueued event. */ 65 int response; 66 }; 67 68 static int new_connection(int vid); 69 static int start_device(int vid); 70 static void stop_device(int vid); 71 static void destroy_connection(int vid); 72 73 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 74 static int get_config(int vid, uint8_t *config, uint32_t len); 75 static int set_config(int vid, uint8_t *config, uint32_t offset, 76 uint32_t size, uint32_t flags); 77 #endif 78 79 const struct vhost_device_ops g_spdk_vhost_ops = { 80 .new_device = start_device, 81 .destroy_device = stop_device, 82 .new_connection = new_connection, 83 .destroy_connection = destroy_connection, 84 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 85 .get_config = get_config, 86 .set_config = set_config, 87 .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough, 88 .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call, 89 .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap, 90 .vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr, 91 #endif 92 }; 93 94 static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER( 95 g_spdk_vhost_devices); 96 static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 97 98 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len) 99 { 100 void *vva; 101 uint64_t newlen; 102 103 newlen = len; 104 vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen); 105 if (newlen != len) { 106 return NULL; 107 } 108 109 return vva; 110 111 } 112 113 static void 114 spdk_vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 115 uint16_t req_id) 116 { 117 struct vring_desc *desc, *desc_table; 118 uint32_t desc_table_size; 119 int rc; 120 121 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 122 return; 123 } 124 125 rc = spdk_vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 126 if (spdk_unlikely(rc != 0)) { 127 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 128 return; 129 } 130 131 do { 132 if (spdk_vhost_vring_desc_is_wr(desc)) { 133 /* To be honest, only pages realy touched should be logged, but 134 * doing so would require tracking those changes in each backed. 135 * Also backend most likely will touch all/most of those pages so 136 * for lets assume we touched all pages passed to as writeable buffers. */ 137 rte_vhost_log_write(vsession->vid, desc->addr, desc->len); 138 } 139 spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 140 } while (desc); 141 } 142 143 static void 144 spdk_vhost_log_used_vring_elem(struct spdk_vhost_session *vsession, 145 struct spdk_vhost_virtqueue *virtqueue, 146 uint16_t idx) 147 { 148 uint64_t offset, len; 149 uint16_t vq_idx; 150 151 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 152 return; 153 } 154 155 offset = offsetof(struct vring_used, ring[idx]); 156 len = sizeof(virtqueue->vring.used->ring[idx]); 157 vq_idx = virtqueue - vsession->virtqueue; 158 159 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 160 } 161 162 static void 163 spdk_vhost_log_used_vring_idx(struct spdk_vhost_session *vsession, 164 struct spdk_vhost_virtqueue *virtqueue) 165 { 166 uint64_t offset, len; 167 uint16_t vq_idx; 168 169 if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 170 return; 171 } 172 173 offset = offsetof(struct vring_used, idx); 174 len = sizeof(virtqueue->vring.used->idx); 175 vq_idx = virtqueue - vsession->virtqueue; 176 177 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 178 } 179 180 /* 181 * Get available requests from avail ring. 182 */ 183 uint16_t 184 spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 185 uint16_t reqs_len) 186 { 187 struct rte_vhost_vring *vring = &virtqueue->vring; 188 struct vring_avail *avail = vring->avail; 189 uint16_t size_mask = vring->size - 1; 190 uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx; 191 uint16_t count, i; 192 193 count = avail_idx - last_idx; 194 if (spdk_likely(count == 0)) { 195 return 0; 196 } 197 198 if (spdk_unlikely(count > vring->size)) { 199 /* TODO: the queue is unrecoverably broken and should be marked so. 200 * For now we will fail silently and report there are no new avail entries. 201 */ 202 return 0; 203 } 204 205 count = spdk_min(count, reqs_len); 206 virtqueue->last_avail_idx += count; 207 for (i = 0; i < count; i++) { 208 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 209 } 210 211 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 212 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 213 last_idx, avail_idx, count); 214 215 return count; 216 } 217 218 static bool 219 spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 220 { 221 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 222 } 223 224 int 225 spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 226 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 227 uint32_t *desc_table_size) 228 { 229 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 230 return -1; 231 } 232 233 *desc = &virtqueue->vring.desc[req_idx]; 234 235 if (spdk_vhost_vring_desc_is_indirect(*desc)) { 236 *desc_table_size = (*desc)->len / sizeof(**desc); 237 *desc_table = spdk_vhost_gpa_to_vva(vsession, (*desc)->addr, 238 sizeof(**desc) * *desc_table_size); 239 *desc = *desc_table; 240 if (*desc == NULL) { 241 return -1; 242 } 243 244 return 0; 245 } 246 247 *desc_table = virtqueue->vring.desc; 248 *desc_table_size = virtqueue->vring.size; 249 250 return 0; 251 } 252 253 int 254 spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, 255 struct spdk_vhost_virtqueue *virtqueue) 256 { 257 if (virtqueue->used_req_cnt == 0) { 258 return 0; 259 } 260 261 virtqueue->req_cnt += virtqueue->used_req_cnt; 262 virtqueue->used_req_cnt = 0; 263 264 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 265 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 266 virtqueue - vsession->virtqueue, virtqueue->last_used_idx); 267 268 eventfd_write(virtqueue->vring.callfd, (eventfd_t)1); 269 return 1; 270 } 271 272 273 static void 274 check_session_io_stats(struct spdk_vhost_session *vsession, uint64_t now) 275 { 276 struct spdk_vhost_virtqueue *virtqueue; 277 uint32_t irq_delay_base = vsession->coalescing_delay_time_base; 278 uint32_t io_threshold = vsession->coalescing_io_rate_threshold; 279 int32_t irq_delay; 280 uint32_t req_cnt; 281 uint16_t q_idx; 282 283 if (now < vsession->next_stats_check_time) { 284 return; 285 } 286 287 vsession->next_stats_check_time = now + vsession->stats_check_interval; 288 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 289 virtqueue = &vsession->virtqueue[q_idx]; 290 291 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 292 if (req_cnt <= io_threshold) { 293 continue; 294 } 295 296 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 297 virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay); 298 299 virtqueue->req_cnt = 0; 300 virtqueue->next_event_time = now; 301 } 302 } 303 304 void 305 spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession) 306 { 307 struct spdk_vhost_virtqueue *virtqueue; 308 uint64_t now; 309 uint16_t q_idx; 310 311 if (vsession->coalescing_delay_time_base == 0) { 312 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 313 virtqueue = &vsession->virtqueue[q_idx]; 314 315 if (virtqueue->vring.desc == NULL || 316 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 317 continue; 318 } 319 320 spdk_vhost_vq_used_signal(vsession, virtqueue); 321 } 322 } else { 323 now = spdk_get_ticks(); 324 check_session_io_stats(vsession, now); 325 326 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 327 virtqueue = &vsession->virtqueue[q_idx]; 328 329 /* No need for event right now */ 330 if (now < virtqueue->next_event_time || 331 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 332 continue; 333 } 334 335 if (!spdk_vhost_vq_used_signal(vsession, virtqueue)) { 336 continue; 337 } 338 339 /* Syscall is quite long so update time */ 340 now = spdk_get_ticks(); 341 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 342 } 343 } 344 } 345 346 static int 347 spdk_vhost_session_set_coalescing(struct spdk_vhost_dev *vdev, 348 struct spdk_vhost_session *vsession, void *ctx) 349 { 350 if (vdev == NULL || vsession == NULL) { 351 /* nothing to do */ 352 return 0; 353 } 354 355 vsession->coalescing_delay_time_base = 356 vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL; 357 vsession->coalescing_io_rate_threshold = 358 vdev->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 359 return 0; 360 } 361 362 int 363 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 364 uint32_t iops_threshold) 365 { 366 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 367 uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 368 369 if (delay_time_base >= UINT32_MAX) { 370 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 371 return -EINVAL; 372 } else if (io_rate == 0) { 373 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 374 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS); 375 return -EINVAL; 376 } 377 378 vdev->coalescing_delay_us = delay_base_us; 379 vdev->coalescing_iops_threshold = iops_threshold; 380 381 spdk_vhost_dev_foreach_session(vdev, spdk_vhost_session_set_coalescing, NULL); 382 return 0; 383 } 384 385 void 386 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 387 uint32_t *iops_threshold) 388 { 389 if (delay_base_us) { 390 *delay_base_us = vdev->coalescing_delay_us; 391 } 392 393 if (iops_threshold) { 394 *iops_threshold = vdev->coalescing_iops_threshold; 395 } 396 } 397 398 /* 399 * Enqueue id and len to used ring. 400 */ 401 void 402 spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 403 struct spdk_vhost_virtqueue *virtqueue, 404 uint16_t id, uint32_t len) 405 { 406 struct rte_vhost_vring *vring = &virtqueue->vring; 407 struct vring_used *used = vring->used; 408 uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1); 409 410 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 411 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 412 virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len); 413 414 spdk_vhost_log_req_desc(vsession, virtqueue, id); 415 416 virtqueue->last_used_idx++; 417 used->ring[last_idx].id = id; 418 used->ring[last_idx].len = len; 419 420 /* Ensure the used ring is updated before we log it or increment used->idx. */ 421 spdk_smp_wmb(); 422 423 spdk_vhost_log_used_vring_elem(vsession, virtqueue, last_idx); 424 * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx; 425 spdk_vhost_log_used_vring_idx(vsession, virtqueue); 426 427 /* Ensure all our used ring changes are visible to the guest at the time 428 * of interrupt. 429 * TODO: this is currently an sfence on x86. For other architectures we 430 * will most likely need an smp_mb(), but smp_mb() is an overkill for x86. 431 */ 432 spdk_wmb(); 433 434 virtqueue->used_req_cnt++; 435 } 436 437 int 438 spdk_vhost_vring_desc_get_next(struct vring_desc **desc, 439 struct vring_desc *desc_table, uint32_t desc_table_size) 440 { 441 struct vring_desc *old_desc = *desc; 442 uint16_t next_idx; 443 444 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 445 *desc = NULL; 446 return 0; 447 } 448 449 next_idx = old_desc->next; 450 if (spdk_unlikely(next_idx >= desc_table_size)) { 451 *desc = NULL; 452 return -1; 453 } 454 455 *desc = &desc_table[next_idx]; 456 return 0; 457 } 458 459 bool 460 spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 461 { 462 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 463 } 464 465 int 466 spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 467 uint16_t *iov_index, const struct vring_desc *desc) 468 { 469 uint32_t remaining = desc->len; 470 uint32_t to_boundary; 471 uint32_t len; 472 uintptr_t payload = desc->addr; 473 uintptr_t vva; 474 475 do { 476 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 477 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 478 return -1; 479 } 480 vva = (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload); 481 if (vva == 0) { 482 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 483 return -1; 484 } 485 to_boundary = VALUE_2MB - _2MB_OFFSET(payload); 486 if (spdk_likely(remaining <= to_boundary)) { 487 len = remaining; 488 } else { 489 /* 490 * Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated 491 * from hugepage memory, so this means this descriptor may be described by 492 * discontiguous vhost memory regions. Do not blindly split on the 2MB boundary, 493 * only split it if the two sides of the boundary do not map to the same vhost 494 * memory region. This helps ensure we do not exceed the max number of IOVs 495 * defined by SPDK_VHOST_IOVS_MAX. 496 */ 497 len = to_boundary; 498 while (len < remaining) { 499 if (vva + len != (uintptr_t)rte_vhost_gpa_to_vva(vsession->mem, payload + len)) { 500 break; 501 } 502 len += spdk_min(remaining - len, VALUE_2MB); 503 } 504 } 505 iov[*iov_index].iov_base = (void *)vva; 506 iov[*iov_index].iov_len = len; 507 remaining -= len; 508 payload += len; 509 (*iov_index)++; 510 } while (remaining); 511 512 return 0; 513 } 514 515 static struct spdk_vhost_session * 516 spdk_vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id) 517 { 518 struct spdk_vhost_session *vsession; 519 520 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 521 if (vsession->id == id) { 522 return vsession; 523 } 524 } 525 526 return NULL; 527 } 528 529 struct spdk_vhost_session * 530 spdk_vhost_session_find_by_vid(int vid) 531 { 532 struct spdk_vhost_dev *vdev; 533 struct spdk_vhost_session *vsession; 534 535 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 536 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 537 if (vsession->vid == vid) { 538 return vsession; 539 } 540 } 541 } 542 543 return NULL; 544 } 545 546 #define SHIFT_2MB 21 547 #define SIZE_2MB (1ULL << SHIFT_2MB) 548 #define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB 549 #define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB 550 551 static void 552 spdk_vhost_session_mem_register(struct spdk_vhost_session *vsession) 553 { 554 struct rte_vhost_mem_region *region; 555 uint32_t i; 556 557 for (i = 0; i < vsession->mem->nregions; i++) { 558 uint64_t start, end, len; 559 region = &vsession->mem->regions[i]; 560 start = FLOOR_2MB(region->mmap_addr); 561 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 562 len = end - start; 563 SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n", 564 start, len); 565 566 if (spdk_mem_register((void *)start, len) != 0) { 567 SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n", 568 i); 569 continue; 570 } 571 } 572 } 573 574 static void 575 spdk_vhost_session_mem_unregister(struct spdk_vhost_session *vsession) 576 { 577 struct rte_vhost_mem_region *region; 578 uint32_t i; 579 580 for (i = 0; i < vsession->mem->nregions; i++) { 581 uint64_t start, end, len; 582 region = &vsession->mem->regions[i]; 583 start = FLOOR_2MB(region->mmap_addr); 584 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 585 len = end - start; 586 587 if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) { 588 continue; /* region has not been registered */ 589 } 590 591 if (spdk_mem_unregister((void *)start, len) != 0) { 592 assert(false); 593 } 594 } 595 596 } 597 598 void 599 spdk_vhost_free_reactor(uint32_t lcore) 600 { 601 g_num_ctrlrs[lcore]--; 602 } 603 604 struct spdk_vhost_dev * 605 spdk_vhost_dev_next(struct spdk_vhost_dev *vdev) 606 { 607 if (vdev == NULL) { 608 return TAILQ_FIRST(&g_spdk_vhost_devices); 609 } 610 611 return TAILQ_NEXT(vdev, tailq); 612 } 613 614 struct spdk_vhost_dev * 615 spdk_vhost_dev_find(const char *ctrlr_name) 616 { 617 struct spdk_vhost_dev *vdev; 618 size_t dev_dirname_len = strlen(dev_dirname); 619 620 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 621 ctrlr_name += dev_dirname_len; 622 } 623 624 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 625 if (strcmp(vdev->name, ctrlr_name) == 0) { 626 return vdev; 627 } 628 } 629 630 return NULL; 631 } 632 633 static int 634 spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 635 { 636 int rc; 637 638 if (cpumask == NULL) { 639 return -1; 640 } 641 642 if (mask == NULL) { 643 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 644 return 0; 645 } 646 647 rc = spdk_app_parse_core_mask(mask, cpumask); 648 if (rc < 0) { 649 SPDK_ERRLOG("invalid cpumask %s\n", mask); 650 return -1; 651 } 652 653 if (spdk_cpuset_count(cpumask) == 0) { 654 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 655 spdk_cpuset_fmt(spdk_app_get_core_mask())); 656 return -1; 657 } 658 659 return 0; 660 } 661 662 static void * 663 _start_rte_driver(void *arg) 664 { 665 char *path = arg; 666 667 if (rte_vhost_driver_start(path) != 0) { 668 return NULL; 669 } 670 671 return path; 672 } 673 674 int 675 spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 676 const struct spdk_vhost_dev_backend *backend) 677 { 678 char path[PATH_MAX]; 679 struct stat file_stat; 680 struct spdk_cpuset *cpumask; 681 int rc; 682 683 assert(vdev); 684 if (name == NULL) { 685 SPDK_ERRLOG("Can't register controller with no name\n"); 686 return -EINVAL; 687 } 688 689 cpumask = spdk_cpuset_alloc(); 690 if (!cpumask) { 691 SPDK_ERRLOG("spdk_cpuset_alloc failed\n"); 692 return -ENOMEM; 693 } 694 695 if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) { 696 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 697 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 698 rc = -EINVAL; 699 goto out; 700 } 701 702 if (spdk_vhost_dev_find(name)) { 703 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 704 rc = -EEXIST; 705 goto out; 706 } 707 708 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 709 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 710 name); 711 rc = -EINVAL; 712 goto out; 713 } 714 715 /* Register vhost driver to handle vhost messages. */ 716 if (stat(path, &file_stat) != -1) { 717 if (!S_ISSOCK(file_stat.st_mode)) { 718 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 719 "The file already exists and is not a socket.\n", 720 path); 721 rc = -EIO; 722 goto out; 723 } else if (unlink(path) != 0) { 724 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 725 "The socket already exists and failed to unlink.\n", 726 path); 727 rc = -EIO; 728 goto out; 729 } 730 } 731 732 if (rte_vhost_driver_register(path, 0) != 0) { 733 SPDK_ERRLOG("Could not register controller %s with vhost library\n", name); 734 SPDK_ERRLOG("Check if domain socket %s already exists\n", path); 735 rc = -EIO; 736 goto out; 737 } 738 if (rte_vhost_driver_set_features(path, backend->virtio_features) || 739 rte_vhost_driver_disable_features(path, backend->disabled_features)) { 740 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name); 741 742 rte_vhost_driver_unregister(path); 743 rc = -EIO; 744 goto out; 745 } 746 747 if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) { 748 rte_vhost_driver_unregister(path); 749 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name); 750 rc = -EIO; 751 goto out; 752 } 753 754 vdev->name = strdup(name); 755 vdev->path = strdup(path); 756 if (vdev->name == NULL || vdev->path == NULL) { 757 free(vdev->name); 758 free(vdev->path); 759 rte_vhost_driver_unregister(path); 760 rc = -EIO; 761 goto out; 762 } 763 764 vdev->cpumask = cpumask; 765 vdev->registered = true; 766 vdev->backend = backend; 767 TAILQ_INIT(&vdev->vsessions); 768 TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq); 769 770 spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 771 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 772 773 spdk_vhost_dev_install_rte_compat_hooks(vdev); 774 775 /* The following might start a POSIX thread that polls for incoming 776 * socket connections and calls backend->start/stop_device. These backend 777 * callbacks are also protected by the global SPDK vhost mutex, so we're 778 * safe with not initializing the vdev just yet. 779 */ 780 if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) { 781 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n", 782 name, errno, spdk_strerror(errno)); 783 rte_vhost_driver_unregister(path); 784 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 785 free(vdev->name); 786 free(vdev->path); 787 rc = -EIO; 788 goto out; 789 } 790 791 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 792 return 0; 793 794 out: 795 spdk_cpuset_free(cpumask); 796 return rc; 797 } 798 799 int 800 spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev) 801 { 802 if (!TAILQ_EMPTY(&vdev->vsessions)) { 803 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 804 return -EBUSY; 805 } 806 807 if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) { 808 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 809 "Check if domain socket %s still exists\n", 810 vdev->name, vdev->path); 811 return -EIO; 812 } 813 814 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 815 816 free(vdev->name); 817 free(vdev->path); 818 spdk_cpuset_free(vdev->cpumask); 819 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 820 return 0; 821 } 822 823 static struct spdk_vhost_session * 824 spdk_vhost_session_next(struct spdk_vhost_dev *vdev, unsigned prev_id) 825 { 826 struct spdk_vhost_session *vsession; 827 828 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 829 if (vsession->id > prev_id) { 830 return vsession; 831 } 832 } 833 834 return NULL; 835 } 836 837 const char * 838 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 839 { 840 assert(vdev != NULL); 841 return vdev->name; 842 } 843 844 const struct spdk_cpuset * 845 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 846 { 847 assert(vdev != NULL); 848 return vdev->cpumask; 849 } 850 851 uint32_t 852 spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask) 853 { 854 uint32_t i, selected_core; 855 uint32_t min_ctrlrs; 856 857 min_ctrlrs = INT_MAX; 858 selected_core = spdk_env_get_first_core(); 859 860 SPDK_ENV_FOREACH_CORE(i) { 861 if (!spdk_cpuset_get_cpu(cpumask, i)) { 862 continue; 863 } 864 865 if (g_num_ctrlrs[i] < min_ctrlrs) { 866 selected_core = i; 867 min_ctrlrs = g_num_ctrlrs[i]; 868 } 869 } 870 871 g_num_ctrlrs[selected_core]++; 872 return selected_core; 873 } 874 875 static void 876 complete_session_event(struct spdk_vhost_session *vsession, int response) 877 { 878 struct spdk_vhost_session_fn_ctx *ctx = vsession->event_ctx; 879 880 ctx->response = response; 881 sem_post(&ctx->sem); 882 } 883 884 void 885 spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response) 886 { 887 if (response == 0) { 888 vsession->lcore = spdk_env_get_current_core(); 889 assert(vsession->vdev->active_session_num < UINT32_MAX); 890 vsession->vdev->active_session_num++; 891 } 892 complete_session_event(vsession, response); 893 } 894 895 void 896 spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response) 897 { 898 if (response == 0) { 899 vsession->lcore = -1; 900 assert(vsession->vdev->active_session_num > 0); 901 vsession->vdev->active_session_num--; 902 } 903 complete_session_event(vsession, response); 904 } 905 906 static void 907 spdk_vhost_event_cb(void *arg1, void *arg2) 908 { 909 struct spdk_vhost_session_fn_ctx *ctx = arg1; 910 struct spdk_vhost_session *vsession; 911 struct spdk_event *ev; 912 913 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 914 ev = spdk_event_allocate(spdk_env_get_current_core(), 915 spdk_vhost_event_cb, arg1, arg2); 916 spdk_event_call(ev); 917 return; 918 } 919 920 vsession = spdk_vhost_session_find_by_id(ctx->vdev, ctx->vsession_id); 921 ctx->cb_fn(ctx->vdev, vsession, NULL); 922 pthread_mutex_unlock(&g_spdk_vhost_mutex); 923 } 924 925 static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 926 struct spdk_vhost_session *vsession, 927 spdk_vhost_session_fn fn, void *arg); 928 929 static void 930 spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2) 931 { 932 struct spdk_vhost_session_fn_ctx *ctx = arg1; 933 struct spdk_vhost_session *vsession = NULL; 934 struct spdk_vhost_dev *vdev = ctx->vdev; 935 struct spdk_event *ev; 936 int rc; 937 938 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 939 ev = spdk_event_allocate(spdk_env_get_current_core(), 940 spdk_vhost_event_async_foreach_fn, arg1, arg2); 941 spdk_event_call(ev); 942 return; 943 } 944 945 vsession = spdk_vhost_session_find_by_id(vdev, ctx->vsession_id); 946 if (vsession == NULL || !vsession->initialized) { 947 /* The session must have been removed in the meantime, so we 948 * just skip it in our foreach chain 949 */ 950 goto out_unlock_continue; 951 } 952 953 if (vsession->lcore >= 0 && 954 (uint32_t)vsession->lcore != spdk_env_get_current_core()) { 955 /* if session has been relocated to other core, it is no longer thread-safe 956 * to access its contents here. Even though we're running under the global 957 * vhost mutex, the session itself (and its pollers) are not. We need to chase 958 * the session thread as many times as necessary. 959 */ 960 ev = spdk_event_allocate(vsession->lcore, 961 spdk_vhost_event_async_foreach_fn, arg1, arg2); 962 spdk_event_call(ev); 963 pthread_mutex_unlock(&g_spdk_vhost_mutex); 964 return; 965 } 966 967 rc = ctx->cb_fn(vdev, vsession, arg2); 968 if (rc < 0) { 969 goto out_unlock; 970 } 971 972 out_unlock_continue: 973 vsession = spdk_vhost_session_next(vdev, ctx->vsession_id); 974 spdk_vhost_external_event_foreach_continue(vdev, vsession, ctx->cb_fn, arg2); 975 out_unlock: 976 pthread_mutex_unlock(&g_spdk_vhost_mutex); 977 free(ctx); 978 } 979 980 int 981 spdk_vhost_session_send_event(int32_t lcore, struct spdk_vhost_session *vsession, 982 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 983 const char *errmsg) 984 { 985 struct spdk_vhost_session_fn_ctx ev_ctx = {0}; 986 struct spdk_event *ev; 987 struct timespec timeout; 988 int rc; 989 990 rc = sem_init(&ev_ctx.sem, 0, 0); 991 if (rc != 0) { 992 SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n"); 993 return -errno; 994 } 995 996 ev_ctx.vdev = vsession->vdev; 997 ev_ctx.vsession_id = vsession->id; 998 ev_ctx.cb_fn = cb_fn; 999 1000 vsession->event_ctx = &ev_ctx; 1001 ev = spdk_event_allocate(lcore, spdk_vhost_event_cb, &ev_ctx, NULL); 1002 assert(ev); 1003 spdk_event_call(ev); 1004 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1005 1006 clock_gettime(CLOCK_REALTIME, &timeout); 1007 timeout.tv_sec += timeout_sec; 1008 1009 rc = sem_timedwait(&ev_ctx.sem, &timeout); 1010 if (rc != 0) { 1011 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 1012 sem_wait(&ev_ctx.sem); 1013 } 1014 1015 sem_destroy(&ev_ctx.sem); 1016 pthread_mutex_lock(&g_spdk_vhost_mutex); 1017 vsession->event_ctx = NULL; 1018 return ev_ctx.response; 1019 } 1020 1021 static int 1022 spdk_vhost_event_async_send_foreach_continue(struct spdk_vhost_session *vsession, 1023 spdk_vhost_session_fn cb_fn, void *arg) 1024 { 1025 struct spdk_vhost_dev *vdev = vsession->vdev; 1026 struct spdk_vhost_session_fn_ctx *ev_ctx; 1027 struct spdk_event *ev; 1028 1029 ev_ctx = calloc(1, sizeof(*ev_ctx)); 1030 if (ev_ctx == NULL) { 1031 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 1032 assert(false); 1033 return -ENOMEM; 1034 } 1035 1036 ev_ctx->vdev = vdev; 1037 ev_ctx->vsession_id = vsession->id; 1038 ev_ctx->cb_fn = cb_fn; 1039 1040 ev = spdk_event_allocate(vsession->lcore, 1041 spdk_vhost_event_async_foreach_fn, ev_ctx, arg); 1042 assert(ev); 1043 spdk_event_call(ev); 1044 1045 return 0; 1046 } 1047 1048 static void 1049 _stop_session(struct spdk_vhost_session *vsession) 1050 { 1051 struct spdk_vhost_dev *vdev = vsession->vdev; 1052 struct spdk_vhost_virtqueue *q; 1053 int rc; 1054 uint16_t i; 1055 1056 rc = vdev->backend->stop_session(vsession); 1057 if (rc != 0) { 1058 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid); 1059 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1060 return; 1061 } 1062 1063 for (i = 0; i < vsession->max_queues; i++) { 1064 q = &vsession->virtqueue[i]; 1065 if (q->vring.desc == NULL) { 1066 continue; 1067 } 1068 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx); 1069 } 1070 1071 spdk_vhost_session_mem_unregister(vsession); 1072 free(vsession->mem); 1073 } 1074 1075 static void 1076 stop_device(int vid) 1077 { 1078 struct spdk_vhost_session *vsession; 1079 1080 pthread_mutex_lock(&g_spdk_vhost_mutex); 1081 vsession = spdk_vhost_session_find_by_vid(vid); 1082 if (vsession == NULL) { 1083 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1084 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1085 return; 1086 } 1087 1088 if (vsession->lcore == -1) { 1089 /* already stopped, nothing to do */ 1090 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1091 return; 1092 } 1093 1094 _stop_session(vsession); 1095 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1096 } 1097 1098 static int 1099 start_device(int vid) 1100 { 1101 struct spdk_vhost_dev *vdev; 1102 struct spdk_vhost_session *vsession; 1103 int rc = -1; 1104 uint16_t i; 1105 1106 pthread_mutex_lock(&g_spdk_vhost_mutex); 1107 1108 vsession = spdk_vhost_session_find_by_vid(vid); 1109 if (vsession == NULL) { 1110 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1111 goto out; 1112 } 1113 1114 vdev = vsession->vdev; 1115 if (vsession->lcore != -1) { 1116 /* already started, nothing to do */ 1117 rc = 0; 1118 goto out; 1119 } 1120 1121 vsession->max_queues = 0; 1122 memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue)); 1123 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1124 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1125 1126 if (rte_vhost_get_vhost_vring(vid, i, &q->vring)) { 1127 continue; 1128 } 1129 1130 if (q->vring.desc == NULL || q->vring.size == 0) { 1131 continue; 1132 } 1133 1134 if (rte_vhost_get_vring_base(vsession->vid, i, &q->last_avail_idx, &q->last_used_idx)) { 1135 q->vring.desc = NULL; 1136 continue; 1137 } 1138 1139 /* Disable notifications. */ 1140 if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) { 1141 SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i); 1142 goto out; 1143 } 1144 1145 vsession->max_queues = i + 1; 1146 } 1147 1148 if (rte_vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) { 1149 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1150 goto out; 1151 } 1152 1153 if (rte_vhost_get_mem_table(vid, &vsession->mem) != 0) { 1154 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1155 goto out; 1156 } 1157 1158 for (i = 0; i < vsession->mem->nregions; i++) { 1159 uint64_t mmap_size = vsession->mem->regions[i].mmap_size; 1160 1161 if (mmap_size & MASK_2MB) { 1162 SPDK_ERRLOG("vhost device %d: Guest mmaped memory size %" PRIx64 1163 " is not a 2MB multiple\n", vid, mmap_size); 1164 free(vsession->mem); 1165 goto out; 1166 } 1167 } 1168 1169 /* 1170 * Not sure right now but this look like some kind of QEMU bug and guest IO 1171 * might be frozed without kicking all queues after live-migration. This look like 1172 * the previous vhost instance failed to effectively deliver all interrupts before 1173 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1174 * should be ignored by guest virtio driver. 1175 * 1176 * Tested on QEMU 2.10.91 and 2.11.50. 1177 */ 1178 for (i = 0; i < vsession->max_queues; i++) { 1179 if (vsession->virtqueue[i].vring.callfd != -1) { 1180 eventfd_write(vsession->virtqueue[i].vring.callfd, (eventfd_t)1); 1181 } 1182 } 1183 1184 spdk_vhost_session_set_coalescing(vdev, vsession, NULL); 1185 spdk_vhost_session_mem_register(vsession); 1186 vsession->initialized = true; 1187 rc = vdev->backend->start_session(vsession); 1188 if (rc != 0) { 1189 spdk_vhost_session_mem_unregister(vsession); 1190 free(vsession->mem); 1191 goto out; 1192 } 1193 1194 out: 1195 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1196 return rc; 1197 } 1198 1199 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1200 static int 1201 get_config(int vid, uint8_t *config, uint32_t len) 1202 { 1203 struct spdk_vhost_session *vsession; 1204 struct spdk_vhost_dev *vdev; 1205 int rc = -1; 1206 1207 pthread_mutex_lock(&g_spdk_vhost_mutex); 1208 vsession = spdk_vhost_session_find_by_vid(vid); 1209 if (vsession == NULL) { 1210 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1211 goto out; 1212 } 1213 1214 vdev = vsession->vdev; 1215 if (vdev->backend->vhost_get_config) { 1216 rc = vdev->backend->vhost_get_config(vdev, config, len); 1217 } 1218 1219 out: 1220 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1221 return rc; 1222 } 1223 1224 static int 1225 set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1226 { 1227 struct spdk_vhost_session *vsession; 1228 struct spdk_vhost_dev *vdev; 1229 int rc = -1; 1230 1231 pthread_mutex_lock(&g_spdk_vhost_mutex); 1232 vsession = spdk_vhost_session_find_by_vid(vid); 1233 if (vsession == NULL) { 1234 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1235 goto out; 1236 } 1237 1238 vdev = vsession->vdev; 1239 if (vdev->backend->vhost_set_config) { 1240 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1241 } 1242 1243 out: 1244 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1245 return rc; 1246 } 1247 #endif 1248 1249 int 1250 spdk_vhost_set_socket_path(const char *basename) 1251 { 1252 int ret; 1253 1254 if (basename && strlen(basename) > 0) { 1255 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1256 if (ret <= 0) { 1257 return -EINVAL; 1258 } 1259 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1260 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1261 return -EINVAL; 1262 } 1263 1264 if (dev_dirname[ret - 1] != '/') { 1265 dev_dirname[ret] = '/'; 1266 dev_dirname[ret + 1] = '\0'; 1267 } 1268 } 1269 1270 return 0; 1271 } 1272 1273 static void * 1274 session_shutdown(void *arg) 1275 { 1276 struct spdk_vhost_dev *vdev = NULL; 1277 1278 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 1279 rte_vhost_driver_unregister(vdev->path); 1280 vdev->registered = false; 1281 } 1282 1283 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1284 spdk_event_call((struct spdk_event *)arg); 1285 return NULL; 1286 } 1287 1288 void 1289 spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1290 { 1291 assert(vdev->backend->dump_info_json != NULL); 1292 vdev->backend->dump_info_json(vdev, w); 1293 } 1294 1295 int 1296 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1297 { 1298 if (vdev->pending_async_op_num) { 1299 return -EBUSY; 1300 } 1301 1302 return vdev->backend->remove_device(vdev); 1303 } 1304 1305 static int 1306 new_connection(int vid) 1307 { 1308 struct spdk_vhost_dev *vdev; 1309 struct spdk_vhost_session *vsession; 1310 char ifname[PATH_MAX]; 1311 1312 pthread_mutex_lock(&g_spdk_vhost_mutex); 1313 1314 if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) { 1315 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid); 1316 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1317 return -1; 1318 } 1319 1320 vdev = spdk_vhost_dev_find(ifname); 1321 if (vdev == NULL) { 1322 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1323 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1324 return -1; 1325 } 1326 1327 /* We expect sessions inside vdev->vsessions to be sorted in ascending 1328 * order in regard of vsession->id. For now we always set id = vsessions_cnt++ 1329 * and append each session to the very end of the vsessions list. 1330 * This is required for spdk_vhost_dev_foreach_session() to work. 1331 */ 1332 if (vdev->vsessions_num == UINT_MAX) { 1333 assert(false); 1334 return -EINVAL; 1335 } 1336 1337 if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) + 1338 vdev->backend->session_ctx_size)) { 1339 SPDK_ERRLOG("vsession alloc failed\n"); 1340 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1341 return -1; 1342 } 1343 memset(vsession, 0, sizeof(*vsession) + vdev->backend->session_ctx_size); 1344 1345 vsession->vdev = vdev; 1346 vsession->id = vdev->vsessions_num++; 1347 vsession->vid = vid; 1348 vsession->lcore = -1; 1349 vsession->initialized = false; 1350 vsession->next_stats_check_time = 0; 1351 vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS * 1352 spdk_get_ticks_hz() / 1000UL; 1353 TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq); 1354 1355 spdk_vhost_session_install_rte_compat_hooks(vsession); 1356 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1357 return 0; 1358 } 1359 1360 static void 1361 destroy_connection(int vid) 1362 { 1363 struct spdk_vhost_session *vsession; 1364 1365 pthread_mutex_lock(&g_spdk_vhost_mutex); 1366 vsession = spdk_vhost_session_find_by_vid(vid); 1367 if (vsession == NULL) { 1368 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1369 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1370 return; 1371 } 1372 1373 if (vsession->lcore != -1) { 1374 _stop_session(vsession); 1375 } 1376 1377 TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); 1378 free(vsession); 1379 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1380 } 1381 1382 static void 1383 spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 1384 struct spdk_vhost_session *vsession, 1385 spdk_vhost_session_fn fn, void *arg) 1386 { 1387 int rc; 1388 1389 if (vsession == NULL) { 1390 goto out_finish_foreach; 1391 } 1392 1393 while (vsession->lcore == -1) { 1394 if (vsession->initialized) { 1395 rc = fn(vdev, vsession, arg); 1396 if (rc < 0) { 1397 return; 1398 } 1399 } 1400 1401 vsession = spdk_vhost_session_next(vdev, vsession->id); 1402 if (vsession == NULL) { 1403 goto out_finish_foreach; 1404 } 1405 } 1406 1407 spdk_vhost_event_async_send_foreach_continue(vsession, fn, arg); 1408 return; 1409 1410 out_finish_foreach: 1411 /* there are no more sessions to iterate through, so call the 1412 * fn one last time with vsession == NULL 1413 */ 1414 assert(vdev->pending_async_op_num > 0); 1415 vdev->pending_async_op_num--; 1416 fn(vdev, NULL, arg); 1417 } 1418 1419 void 1420 spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *vdev, 1421 spdk_vhost_session_fn fn, void *arg) 1422 { 1423 struct spdk_vhost_session *vsession = TAILQ_FIRST(&vdev->vsessions); 1424 1425 assert(vdev->pending_async_op_num < UINT32_MAX); 1426 vdev->pending_async_op_num++; 1427 spdk_vhost_external_event_foreach_continue(vdev, vsession, fn, arg); 1428 } 1429 1430 void 1431 spdk_vhost_lock(void) 1432 { 1433 pthread_mutex_lock(&g_spdk_vhost_mutex); 1434 } 1435 1436 int 1437 spdk_vhost_trylock(void) 1438 { 1439 return -pthread_mutex_trylock(&g_spdk_vhost_mutex); 1440 } 1441 1442 void 1443 spdk_vhost_unlock(void) 1444 { 1445 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1446 } 1447 1448 int 1449 spdk_vhost_init(void) 1450 { 1451 uint32_t last_core; 1452 size_t len; 1453 int ret; 1454 1455 if (dev_dirname[0] == '\0') { 1456 if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) { 1457 SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno)); 1458 return -1; 1459 } 1460 1461 len = strlen(dev_dirname); 1462 if (dev_dirname[len - 1] != '/') { 1463 dev_dirname[len] = '/'; 1464 dev_dirname[len + 1] = '\0'; 1465 } 1466 } 1467 1468 last_core = spdk_env_get_last_core(); 1469 g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t)); 1470 if (!g_num_ctrlrs) { 1471 SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n", 1472 last_core + 1); 1473 return -1; 1474 } 1475 1476 ret = spdk_vhost_scsi_controller_construct(); 1477 if (ret != 0) { 1478 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1479 return -1; 1480 } 1481 1482 ret = spdk_vhost_blk_controller_construct(); 1483 if (ret != 0) { 1484 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1485 return -1; 1486 } 1487 1488 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1489 ret = spdk_vhost_nvme_controller_construct(); 1490 if (ret != 0) { 1491 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1492 return -1; 1493 } 1494 #endif 1495 1496 return 0; 1497 } 1498 1499 static void 1500 _spdk_vhost_fini(void *arg1, void *arg2) 1501 { 1502 spdk_vhost_fini_cb fini_cb = arg1; 1503 struct spdk_vhost_dev *vdev, *tmp; 1504 1505 spdk_vhost_lock(); 1506 vdev = spdk_vhost_dev_next(NULL); 1507 while (vdev != NULL) { 1508 tmp = spdk_vhost_dev_next(vdev); 1509 spdk_vhost_dev_remove(vdev); 1510 /* don't care if it fails, there's nothing we can do for now */ 1511 vdev = tmp; 1512 } 1513 spdk_vhost_unlock(); 1514 1515 /* All devices are removed now. */ 1516 free(g_num_ctrlrs); 1517 fini_cb(); 1518 } 1519 1520 void 1521 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1522 { 1523 pthread_t tid; 1524 int rc; 1525 struct spdk_event *fini_ev; 1526 1527 fini_ev = spdk_event_allocate(spdk_env_get_current_core(), _spdk_vhost_fini, fini_cb, NULL); 1528 1529 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1530 * ops for stopping a device or removing a connection, we need to call it from 1531 * a separate thread to avoid deadlock. 1532 */ 1533 rc = pthread_create(&tid, NULL, &session_shutdown, fini_ev); 1534 if (rc < 0) { 1535 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1536 abort(); 1537 } 1538 pthread_detach(tid); 1539 } 1540 1541 void 1542 spdk_vhost_config_json(struct spdk_json_write_ctx *w) 1543 { 1544 struct spdk_vhost_dev *vdev; 1545 uint32_t delay_base_us; 1546 uint32_t iops_threshold; 1547 1548 spdk_json_write_array_begin(w); 1549 1550 spdk_vhost_lock(); 1551 vdev = spdk_vhost_dev_next(NULL); 1552 while (vdev != NULL) { 1553 vdev->backend->write_config_json(vdev, w); 1554 1555 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1556 if (delay_base_us) { 1557 spdk_json_write_object_begin(w); 1558 spdk_json_write_named_string(w, "method", "set_vhost_controller_coalescing"); 1559 1560 spdk_json_write_named_object_begin(w, "params"); 1561 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1562 spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us); 1563 spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold); 1564 spdk_json_write_object_end(w); 1565 1566 spdk_json_write_object_end(w); 1567 } 1568 vdev = spdk_vhost_dev_next(vdev); 1569 } 1570 spdk_vhost_unlock(); 1571 1572 spdk_json_write_array_end(w); 1573 } 1574 1575 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1576 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1577