1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/barrier.h" 41 42 #include "spdk/vhost.h" 43 #include "vhost_internal.h" 44 45 static uint32_t *g_num_ctrlrs; 46 47 /* Path to folder where character device will be created. Can be set by user. */ 48 static char dev_dirname[PATH_MAX] = ""; 49 50 struct spdk_vhost_dev_event_ctx { 51 /** Pointer to the controller obtained before enqueuing the event */ 52 struct spdk_vhost_dev *vdev; 53 54 /** ID of the vdev to send event to. */ 55 unsigned vdev_id; 56 57 /** User callback function to be executed on given lcore. */ 58 spdk_vhost_event_fn cb_fn; 59 60 /** Semaphore used to signal that event is done. */ 61 sem_t sem; 62 63 /** Response to be written by enqueued event. */ 64 int response; 65 }; 66 67 static int new_connection(int vid); 68 static int start_device(int vid); 69 static void stop_device(int vid); 70 static void destroy_connection(int vid); 71 static int get_config(int vid, uint8_t *config, uint32_t len); 72 static int set_config(int vid, uint8_t *config, uint32_t offset, 73 uint32_t size, uint32_t flags); 74 75 const struct vhost_device_ops g_spdk_vhost_ops = { 76 .new_device = start_device, 77 .destroy_device = stop_device, 78 .get_config = get_config, 79 .set_config = set_config, 80 .new_connection = new_connection, 81 .destroy_connection = destroy_connection, 82 .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough, 83 .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call, 84 .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap, 85 }; 86 87 static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER( 88 g_spdk_vhost_devices); 89 static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 90 91 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_dev *vdev, uint64_t addr, uint64_t len) 92 { 93 void *vva; 94 uint64_t newlen; 95 96 newlen = len; 97 vva = (void *)rte_vhost_va_from_guest_pa(vdev->mem, addr, &newlen); 98 if (newlen != len) { 99 return NULL; 100 } 101 102 return vva; 103 104 } 105 106 static void 107 spdk_vhost_log_req_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue, 108 uint16_t req_id) 109 { 110 struct vring_desc *desc, *desc_table; 111 uint32_t desc_table_size; 112 int rc; 113 114 if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) { 115 return; 116 } 117 118 rc = spdk_vhost_vq_get_desc(vdev, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 119 if (spdk_unlikely(rc != 0)) { 120 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 121 return; 122 } 123 124 do { 125 if (spdk_vhost_vring_desc_is_wr(desc)) { 126 /* To be honest, only pages realy touched should be logged, but 127 * doing so would require tracking those changes in each backed. 128 * Also backend most likely will touch all/most of those pages so 129 * for lets assume we touched all pages passed to as writeable buffers. */ 130 rte_vhost_log_write(vdev->vid, desc->addr, desc->len); 131 } 132 spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 133 } while (desc); 134 } 135 136 static void 137 spdk_vhost_log_used_vring_elem(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue, 138 uint16_t idx) 139 { 140 uint64_t offset, len; 141 uint16_t vq_idx; 142 143 if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) { 144 return; 145 } 146 147 offset = offsetof(struct vring_used, ring[idx]); 148 len = sizeof(virtqueue->vring.used->ring[idx]); 149 vq_idx = virtqueue - vdev->virtqueue; 150 151 rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len); 152 } 153 154 static void 155 spdk_vhost_log_used_vring_idx(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue) 156 { 157 uint64_t offset, len; 158 uint16_t vq_idx; 159 160 if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) { 161 return; 162 } 163 164 offset = offsetof(struct vring_used, idx); 165 len = sizeof(virtqueue->vring.used->idx); 166 vq_idx = virtqueue - vdev->virtqueue; 167 168 rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len); 169 } 170 171 /* 172 * Get available requests from avail ring. 173 */ 174 uint16_t 175 spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 176 uint16_t reqs_len) 177 { 178 struct rte_vhost_vring *vring = &virtqueue->vring; 179 struct vring_avail *avail = vring->avail; 180 uint16_t size_mask = vring->size - 1; 181 uint16_t last_idx = vring->last_avail_idx, avail_idx = avail->idx; 182 uint16_t count, i; 183 184 count = avail_idx - last_idx; 185 if (spdk_likely(count == 0)) { 186 return 0; 187 } 188 189 if (spdk_unlikely(count > vring->size)) { 190 /* TODO: the queue is unrecoverably broken and should be marked so. 191 * For now we will fail silently and report there are no new avail entries. 192 */ 193 return 0; 194 } 195 196 count = spdk_min(count, reqs_len); 197 vring->last_avail_idx += count; 198 for (i = 0; i < count; i++) { 199 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 200 } 201 202 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 203 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 204 last_idx, avail_idx, count); 205 206 return count; 207 } 208 209 static bool 210 spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 211 { 212 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 213 } 214 215 int 216 spdk_vhost_vq_get_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue, 217 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 218 uint32_t *desc_table_size) 219 { 220 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 221 return -1; 222 } 223 224 *desc = &virtqueue->vring.desc[req_idx]; 225 226 if (spdk_vhost_vring_desc_is_indirect(*desc)) { 227 assert(spdk_vhost_dev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)); 228 *desc_table_size = (*desc)->len / sizeof(**desc); 229 *desc_table = spdk_vhost_gpa_to_vva(vdev, (*desc)->addr, 230 sizeof(**desc) * *desc_table_size); 231 *desc = *desc_table; 232 if (*desc == NULL) { 233 return -1; 234 } 235 236 return 0; 237 } 238 239 *desc_table = virtqueue->vring.desc; 240 *desc_table_size = virtqueue->vring.size; 241 242 return 0; 243 } 244 245 int 246 spdk_vhost_vq_used_signal(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue) 247 { 248 if (virtqueue->used_req_cnt == 0) { 249 return 0; 250 } 251 252 virtqueue->req_cnt += virtqueue->used_req_cnt; 253 virtqueue->used_req_cnt = 0; 254 255 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 256 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 257 virtqueue - vdev->virtqueue, virtqueue->vring.last_used_idx); 258 259 eventfd_write(virtqueue->vring.callfd, (eventfd_t)1); 260 return 1; 261 } 262 263 264 static void 265 check_dev_io_stats(struct spdk_vhost_dev *vdev, uint64_t now) 266 { 267 struct spdk_vhost_virtqueue *virtqueue; 268 uint32_t irq_delay_base = vdev->coalescing_delay_time_base; 269 uint32_t io_threshold = vdev->coalescing_io_rate_threshold; 270 uint32_t irq_delay, req_cnt; 271 uint16_t q_idx; 272 273 if (now < vdev->next_stats_check_time) { 274 return; 275 } 276 277 vdev->next_stats_check_time = now + vdev->stats_check_interval; 278 for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) { 279 virtqueue = &vdev->virtqueue[q_idx]; 280 281 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 282 if (req_cnt <= io_threshold) { 283 continue; 284 } 285 286 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 287 virtqueue->irq_delay_time = (uint32_t) spdk_min(0, irq_delay); 288 289 virtqueue->req_cnt = 0; 290 virtqueue->next_event_time = now; 291 } 292 } 293 294 void 295 spdk_vhost_dev_used_signal(struct spdk_vhost_dev *vdev) 296 { 297 struct spdk_vhost_virtqueue *virtqueue; 298 uint64_t now; 299 uint16_t q_idx; 300 301 if (vdev->coalescing_delay_time_base == 0) { 302 for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) { 303 virtqueue = &vdev->virtqueue[q_idx]; 304 305 if (virtqueue->vring.desc == NULL || 306 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 307 continue; 308 } 309 310 spdk_vhost_vq_used_signal(vdev, virtqueue); 311 } 312 } else { 313 now = spdk_get_ticks(); 314 check_dev_io_stats(vdev, now); 315 316 for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) { 317 virtqueue = &vdev->virtqueue[q_idx]; 318 319 /* No need for event right now */ 320 if (now < virtqueue->next_event_time || 321 (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 322 continue; 323 } 324 325 if (!spdk_vhost_vq_used_signal(vdev, virtqueue)) { 326 continue; 327 } 328 329 /* Syscall is quite long so update time */ 330 now = spdk_get_ticks(); 331 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 332 } 333 } 334 } 335 336 int 337 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 338 uint32_t iops_threshold) 339 { 340 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 341 uint32_t io_rate = iops_threshold * SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS / 1000U; 342 343 if (delay_time_base >= UINT32_MAX) { 344 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 345 return -EINVAL; 346 } else if (io_rate == 0) { 347 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 348 1000U / SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS); 349 return -EINVAL; 350 } 351 352 vdev->coalescing_delay_time_base = delay_time_base; 353 vdev->coalescing_io_rate_threshold = io_rate; 354 355 vdev->coalescing_delay_us = delay_base_us; 356 vdev->coalescing_iops_threshold = iops_threshold; 357 return 0; 358 } 359 360 void 361 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 362 uint32_t *iops_threshold) 363 { 364 if (delay_base_us) { 365 *delay_base_us = vdev->coalescing_delay_us; 366 } 367 368 if (iops_threshold) { 369 *iops_threshold = vdev->coalescing_iops_threshold; 370 } 371 } 372 373 /* 374 * Enqueue id and len to used ring. 375 */ 376 void 377 spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue, 378 uint16_t id, uint32_t len) 379 { 380 struct rte_vhost_vring *vring = &virtqueue->vring; 381 struct vring_used *used = vring->used; 382 uint16_t last_idx = vring->last_used_idx & (vring->size - 1); 383 384 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 385 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 386 virtqueue - vdev->virtqueue, vring->last_used_idx, id, len); 387 388 spdk_vhost_log_req_desc(vdev, virtqueue, id); 389 390 vring->last_used_idx++; 391 used->ring[last_idx].id = id; 392 used->ring[last_idx].len = len; 393 394 /* Ensure the used ring is updated before we log it or increment used->idx. */ 395 spdk_smp_wmb(); 396 397 spdk_vhost_log_used_vring_elem(vdev, virtqueue, last_idx); 398 * (volatile uint16_t *) &used->idx = vring->last_used_idx; 399 spdk_vhost_log_used_vring_idx(vdev, virtqueue); 400 401 /* Ensure all our used ring changes are visible to the guest at the time 402 * of interrupt. 403 * TODO: this is currently an sfence on x86. For other architectures we 404 * will most likely need an smp_mb(), but smp_mb() is an overkill for x86. 405 */ 406 spdk_wmb(); 407 408 virtqueue->used_req_cnt++; 409 } 410 411 int 412 spdk_vhost_vring_desc_get_next(struct vring_desc **desc, 413 struct vring_desc *desc_table, uint32_t desc_table_size) 414 { 415 struct vring_desc *old_desc = *desc; 416 uint16_t next_idx; 417 418 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 419 *desc = NULL; 420 return 0; 421 } 422 423 next_idx = old_desc->next; 424 if (spdk_unlikely(next_idx >= desc_table_size)) { 425 *desc = NULL; 426 return -1; 427 } 428 429 *desc = &desc_table[next_idx]; 430 return 0; 431 } 432 433 bool 434 spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 435 { 436 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 437 } 438 439 #define _2MB_OFFSET(ptr) ((ptr) & (0x200000 - 1)) 440 441 int 442 spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov, 443 uint16_t *iov_index, const struct vring_desc *desc) 444 { 445 uint32_t remaining = desc->len; 446 uint32_t to_boundary; 447 uint32_t len; 448 uintptr_t payload = desc->addr; 449 uintptr_t vva; 450 451 while (remaining) { 452 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 453 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 454 return -1; 455 } 456 vva = (uintptr_t)rte_vhost_gpa_to_vva(vdev->mem, payload); 457 if (vva == 0) { 458 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 459 return -1; 460 } 461 to_boundary = 0x200000 - _2MB_OFFSET(payload); 462 if (spdk_likely(remaining <= to_boundary)) { 463 len = remaining; 464 } else { 465 /* 466 * Descriptor crosses a 2MB hugepage boundary. vhost memory regions are allocated 467 * from hugepage memory, so this means this descriptor may be described by 468 * discontiguous vhost memory regions. Do not blindly split on the 2MB boundary, 469 * only split it if the two sides of the boundary do not map to the same vhost 470 * memory region. This helps ensure we do not exceed the max number of IOVs 471 * defined by SPDK_VHOST_IOVS_MAX. 472 */ 473 len = to_boundary; 474 while (len < remaining) { 475 if (vva + len != (uintptr_t)rte_vhost_gpa_to_vva(vdev->mem, payload + len)) { 476 break; 477 } 478 len += spdk_min(remaining - len, 0x200000); 479 } 480 } 481 iov[*iov_index].iov_base = (void *)vva; 482 iov[*iov_index].iov_len = len; 483 remaining -= len; 484 payload += len; 485 (*iov_index)++; 486 } 487 488 return 0; 489 } 490 491 static struct spdk_vhost_dev * 492 spdk_vhost_dev_find_by_id(unsigned id) 493 { 494 struct spdk_vhost_dev *vdev; 495 496 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 497 if (vdev->id == id) { 498 return vdev; 499 } 500 } 501 502 return NULL; 503 } 504 505 static struct spdk_vhost_dev * 506 spdk_vhost_dev_find_by_vid(int vid) 507 { 508 struct spdk_vhost_dev *vdev; 509 510 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 511 if (vdev->vid == vid) { 512 return vdev; 513 } 514 } 515 516 return NULL; 517 } 518 519 #define SHIFT_2MB 21 520 #define SIZE_2MB (1ULL << SHIFT_2MB) 521 #define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB 522 #define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB 523 524 static void 525 spdk_vhost_dev_mem_register(struct spdk_vhost_dev *vdev) 526 { 527 struct rte_vhost_mem_region *region; 528 uint32_t i; 529 530 for (i = 0; i < vdev->mem->nregions; i++) { 531 uint64_t start, end, len; 532 region = &vdev->mem->regions[i]; 533 start = FLOOR_2MB(region->mmap_addr); 534 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 535 len = end - start; 536 SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n", 537 start, len); 538 539 if (spdk_mem_register((void *)start, len) != 0) { 540 SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n", 541 i); 542 continue; 543 } 544 } 545 } 546 547 static void 548 spdk_vhost_dev_mem_unregister(struct spdk_vhost_dev *vdev) 549 { 550 struct rte_vhost_mem_region *region; 551 uint32_t i; 552 553 for (i = 0; i < vdev->mem->nregions; i++) { 554 uint64_t start, end, len; 555 region = &vdev->mem->regions[i]; 556 start = FLOOR_2MB(region->mmap_addr); 557 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 558 len = end - start; 559 560 if (spdk_vtophys((void *) start) == SPDK_VTOPHYS_ERROR) { 561 continue; /* region has not been registered */ 562 } 563 564 if (spdk_mem_unregister((void *)start, len) != 0) { 565 assert(false); 566 } 567 } 568 569 } 570 571 static void 572 spdk_vhost_free_reactor(uint32_t lcore) 573 { 574 g_num_ctrlrs[lcore]--; 575 } 576 577 struct spdk_vhost_dev * 578 spdk_vhost_dev_find(const char *ctrlr_name) 579 { 580 struct spdk_vhost_dev *vdev; 581 size_t dev_dirname_len = strlen(dev_dirname); 582 583 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 584 ctrlr_name += dev_dirname_len; 585 } 586 587 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 588 if (strcmp(vdev->name, ctrlr_name) == 0) { 589 return vdev; 590 } 591 } 592 593 return NULL; 594 } 595 596 static int 597 spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 598 { 599 int rc; 600 601 if (cpumask == NULL) { 602 return -1; 603 } 604 605 if (mask == NULL) { 606 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 607 return 0; 608 } 609 610 rc = spdk_app_parse_core_mask(mask, cpumask); 611 if (rc < 0) { 612 SPDK_ERRLOG("invalid cpumask %s\n", mask); 613 return -1; 614 } 615 616 if (spdk_cpuset_count(cpumask) == 0) { 617 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 618 spdk_cpuset_fmt(spdk_app_get_core_mask())); 619 return -1; 620 } 621 622 return 0; 623 } 624 625 static void * 626 _start_rte_driver(void *arg) 627 { 628 char *path = arg; 629 630 if (rte_vhost_driver_start(path) != 0) { 631 return NULL; 632 } 633 634 return path; 635 } 636 637 int 638 spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 639 const struct spdk_vhost_dev_backend *backend) 640 { 641 static unsigned ctrlr_num; 642 char path[PATH_MAX]; 643 struct stat file_stat; 644 struct spdk_cpuset *cpumask; 645 int rc; 646 647 assert(vdev); 648 649 /* We expect devices inside g_spdk_vhost_devices to be sorted in ascending 650 * order in regard of vdev->id. For now we always set vdev->id = ctrlr_num++ 651 * and append each vdev to the very end of g_spdk_vhost_devices list. 652 * This is required for foreach vhost events to work. 653 */ 654 if (ctrlr_num == UINT_MAX) { 655 assert(false); 656 return -EINVAL; 657 } 658 659 if (name == NULL) { 660 SPDK_ERRLOG("Can't register controller with no name\n"); 661 return -EINVAL; 662 } 663 664 cpumask = spdk_cpuset_alloc(); 665 if (!cpumask) { 666 SPDK_ERRLOG("spdk_cpuset_alloc failed\n"); 667 return -ENOMEM; 668 } 669 670 if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) { 671 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 672 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 673 rc = -EINVAL; 674 goto out; 675 } 676 677 if (spdk_vhost_dev_find(name)) { 678 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 679 rc = -EEXIST; 680 goto out; 681 } 682 683 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 684 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 685 name); 686 rc = -EINVAL; 687 goto out; 688 } 689 690 /* Register vhost driver to handle vhost messages. */ 691 if (stat(path, &file_stat) != -1) { 692 if (!S_ISSOCK(file_stat.st_mode)) { 693 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 694 "The file already exists and is not a socket.\n", 695 path); 696 rc = -EIO; 697 goto out; 698 } else if (unlink(path) != 0) { 699 SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": " 700 "The socket already exists and failed to unlink.\n", 701 path); 702 rc = -EIO; 703 goto out; 704 } 705 } 706 707 if (rte_vhost_driver_register(path, 0) != 0) { 708 SPDK_ERRLOG("Could not register controller %s with vhost library\n", name); 709 SPDK_ERRLOG("Check if domain socket %s already exists\n", path); 710 rc = -EIO; 711 goto out; 712 } 713 if (rte_vhost_driver_set_features(path, backend->virtio_features) || 714 rte_vhost_driver_disable_features(path, backend->disabled_features)) { 715 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name); 716 717 rte_vhost_driver_unregister(path); 718 rc = -EIO; 719 goto out; 720 } 721 722 if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) { 723 rte_vhost_driver_unregister(path); 724 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name); 725 rc = -EIO; 726 goto out; 727 } 728 729 /* The following might start a POSIX thread that polls for incoming 730 * socket connections and calls backend->start/stop_device. These backend 731 * callbacks are also protected by the global SPDK vhost mutex, so we're 732 * safe with not initializing the vdev just yet. 733 */ 734 if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) { 735 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n", 736 name, errno, spdk_strerror(errno)); 737 rte_vhost_driver_unregister(path); 738 rc = -EIO; 739 goto out; 740 } 741 742 vdev->name = strdup(name); 743 vdev->path = strdup(path); 744 vdev->id = ctrlr_num++; 745 vdev->vid = -1; 746 vdev->lcore = -1; 747 vdev->cpumask = cpumask; 748 vdev->registered = true; 749 vdev->backend = backend; 750 751 spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 752 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 753 vdev->next_stats_check_time = 0; 754 vdev->stats_check_interval = SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS * spdk_get_ticks_hz() / 755 1000UL; 756 757 TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq); 758 759 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 760 return 0; 761 762 out: 763 spdk_cpuset_free(cpumask); 764 return rc; 765 } 766 767 int 768 spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev) 769 { 770 if (vdev->vid != -1) { 771 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 772 return -ENODEV; 773 } 774 775 if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) { 776 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 777 "Check if domain socket %s still exists\n", 778 vdev->name, vdev->path); 779 return -EIO; 780 } 781 782 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 783 784 free(vdev->name); 785 free(vdev->path); 786 spdk_cpuset_free(vdev->cpumask); 787 TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq); 788 return 0; 789 } 790 791 static struct spdk_vhost_dev * 792 spdk_vhost_dev_next(unsigned i) 793 { 794 struct spdk_vhost_dev *vdev; 795 796 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 797 if (vdev->id > i) { 798 return vdev; 799 } 800 } 801 802 return NULL; 803 } 804 805 const char * 806 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 807 { 808 assert(vdev != NULL); 809 return vdev->name; 810 } 811 812 const struct spdk_cpuset * 813 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 814 { 815 assert(vdev != NULL); 816 return vdev->cpumask; 817 } 818 819 static uint32_t 820 spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask) 821 { 822 uint32_t i, selected_core; 823 uint32_t min_ctrlrs; 824 825 min_ctrlrs = INT_MAX; 826 selected_core = spdk_env_get_first_core(); 827 828 SPDK_ENV_FOREACH_CORE(i) { 829 if (!spdk_cpuset_get_cpu(cpumask, i)) { 830 continue; 831 } 832 833 if (g_num_ctrlrs[i] < min_ctrlrs) { 834 selected_core = i; 835 min_ctrlrs = g_num_ctrlrs[i]; 836 } 837 } 838 839 g_num_ctrlrs[selected_core]++; 840 return selected_core; 841 } 842 843 void 844 spdk_vhost_dev_backend_event_done(void *event_ctx, int response) 845 { 846 struct spdk_vhost_dev_event_ctx *ctx = event_ctx; 847 848 ctx->response = response; 849 sem_post(&ctx->sem); 850 } 851 852 static void 853 spdk_vhost_event_cb(void *arg1, void *arg2) 854 { 855 struct spdk_vhost_dev_event_ctx *ctx = arg1; 856 857 ctx->cb_fn(ctx->vdev, ctx); 858 } 859 860 static void 861 spdk_vhost_event_async_fn(void *arg1, void *arg2) 862 { 863 struct spdk_vhost_dev_event_ctx *ctx = arg1; 864 struct spdk_vhost_dev *vdev; 865 struct spdk_event *ev; 866 867 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 868 ev = spdk_event_allocate(spdk_env_get_current_core(), spdk_vhost_event_async_fn, arg1, arg2); 869 spdk_event_call(ev); 870 return; 871 } 872 873 vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id); 874 if (vdev != ctx->vdev) { 875 /* vdev has been changed after enqueuing this event */ 876 vdev = NULL; 877 } 878 879 ctx->cb_fn(vdev, arg2); 880 pthread_mutex_unlock(&g_spdk_vhost_mutex); 881 882 free(ctx); 883 } 884 885 static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 886 spdk_vhost_event_fn fn, void *arg); 887 888 static void 889 spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2) 890 { 891 struct spdk_vhost_dev_event_ctx *ctx = arg1; 892 struct spdk_vhost_dev *vdev; 893 struct spdk_event *ev; 894 895 if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) { 896 ev = spdk_event_allocate(spdk_env_get_current_core(), 897 spdk_vhost_event_async_foreach_fn, arg1, arg2); 898 spdk_event_call(ev); 899 return; 900 } 901 902 vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id); 903 if (vdev == ctx->vdev) { 904 ctx->cb_fn(vdev, arg2); 905 } else { 906 /* ctx->vdev is probably a dangling pointer at this point. 907 * It must have been removed in the meantime, so we just skip 908 * it in our foreach chain. */ 909 } 910 911 vdev = spdk_vhost_dev_next(ctx->vdev_id); 912 spdk_vhost_external_event_foreach_continue(vdev, ctx->cb_fn, arg2); 913 pthread_mutex_unlock(&g_spdk_vhost_mutex); 914 915 free(ctx); 916 } 917 918 static int 919 spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn, 920 unsigned timeout_sec, const char *errmsg) 921 { 922 struct spdk_vhost_dev_event_ctx ev_ctx = {0}; 923 struct spdk_event *ev; 924 struct timespec timeout; 925 int rc; 926 927 rc = sem_init(&ev_ctx.sem, 0, 0); 928 if (rc != 0) { 929 SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n"); 930 return -errno; 931 } 932 933 ev_ctx.vdev = vdev; 934 ev_ctx.cb_fn = cb_fn; 935 ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_cb, &ev_ctx, NULL); 936 assert(ev); 937 spdk_event_call(ev); 938 939 clock_gettime(CLOCK_REALTIME, &timeout); 940 timeout.tv_sec += timeout_sec; 941 942 rc = sem_timedwait(&ev_ctx.sem, &timeout); 943 if (rc != 0) { 944 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 945 sem_wait(&ev_ctx.sem); 946 } 947 948 sem_destroy(&ev_ctx.sem); 949 return ev_ctx.response; 950 } 951 952 static int 953 spdk_vhost_event_async_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn, void *arg, 954 bool foreach) 955 { 956 struct spdk_vhost_dev_event_ctx *ev_ctx; 957 struct spdk_event *ev; 958 spdk_event_fn fn; 959 960 ev_ctx = calloc(1, sizeof(*ev_ctx)); 961 if (ev_ctx == NULL) { 962 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 963 return -ENOMEM; 964 } 965 966 ev_ctx->vdev = vdev; 967 ev_ctx->vdev_id = vdev->id; 968 ev_ctx->cb_fn = cb_fn; 969 970 fn = foreach ? spdk_vhost_event_async_foreach_fn : spdk_vhost_event_async_fn; 971 ev = spdk_event_allocate(ev_ctx->vdev->lcore, fn, ev_ctx, arg); 972 assert(ev); 973 spdk_event_call(ev); 974 975 return 0; 976 } 977 978 static void 979 stop_device(int vid) 980 { 981 struct spdk_vhost_dev *vdev; 982 struct rte_vhost_vring *q; 983 int rc; 984 uint16_t i; 985 986 pthread_mutex_lock(&g_spdk_vhost_mutex); 987 vdev = spdk_vhost_dev_find_by_vid(vid); 988 if (vdev == NULL) { 989 SPDK_ERRLOG("Couldn't find device with vid %d to stop.\n", vid); 990 pthread_mutex_unlock(&g_spdk_vhost_mutex); 991 return; 992 } 993 994 if (vdev->lcore == -1) { 995 SPDK_ERRLOG("Controller %s is not loaded.\n", vdev->name); 996 pthread_mutex_unlock(&g_spdk_vhost_mutex); 997 return; 998 } 999 1000 rc = spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device"); 1001 if (rc != 0) { 1002 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vid); 1003 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1004 return; 1005 } 1006 1007 for (i = 0; i < vdev->max_queues; i++) { 1008 q = &vdev->virtqueue[i].vring; 1009 if (q->desc == NULL) { 1010 continue; 1011 } 1012 rte_vhost_set_vhost_vring_last_idx(vdev->vid, i, q->last_avail_idx, q->last_used_idx); 1013 } 1014 1015 spdk_vhost_dev_mem_unregister(vdev); 1016 free(vdev->mem); 1017 spdk_vhost_free_reactor(vdev->lcore); 1018 vdev->lcore = -1; 1019 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1020 } 1021 1022 static int 1023 start_device(int vid) 1024 { 1025 struct spdk_vhost_dev *vdev; 1026 int rc = -1; 1027 uint16_t i; 1028 1029 pthread_mutex_lock(&g_spdk_vhost_mutex); 1030 1031 vdev = spdk_vhost_dev_find_by_vid(vid); 1032 if (vdev == NULL) { 1033 SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid); 1034 goto out; 1035 } 1036 1037 if (vdev->lcore != -1) { 1038 SPDK_ERRLOG("Controller %s already loaded.\n", vdev->name); 1039 goto out; 1040 } 1041 1042 vdev->max_queues = 0; 1043 memset(vdev->virtqueue, 0, sizeof(vdev->virtqueue)); 1044 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1045 if (rte_vhost_get_vhost_vring(vid, i, &vdev->virtqueue[i].vring)) { 1046 continue; 1047 } 1048 1049 if (vdev->virtqueue[i].vring.desc == NULL || 1050 vdev->virtqueue[i].vring.size == 0) { 1051 continue; 1052 } 1053 1054 /* Disable notifications. */ 1055 if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) { 1056 SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i); 1057 goto out; 1058 } 1059 1060 vdev->max_queues = i + 1; 1061 } 1062 1063 if (rte_vhost_get_negotiated_features(vid, &vdev->negotiated_features) != 0) { 1064 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1065 goto out; 1066 } 1067 1068 if (rte_vhost_get_mem_table(vid, &vdev->mem) != 0) { 1069 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1070 goto out; 1071 } 1072 1073 /* 1074 * Not sure right now but this look like some kind of QEMU bug and guest IO 1075 * might be frozed without kicking all queues after live-migration. This look like 1076 * the previous vhost instance failed to effectively deliver all interrupts before 1077 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1078 * should be ignored by guest virtio driver. 1079 * 1080 * Tested on QEMU 2.10.91 and 2.11.50. 1081 */ 1082 for (i = 0; i < vdev->max_queues; i++) { 1083 if (vdev->virtqueue[i].vring.callfd != -1) { 1084 eventfd_write(vdev->virtqueue[i].vring.callfd, (eventfd_t)1); 1085 } 1086 } 1087 1088 vdev->lcore = spdk_vhost_allocate_reactor(vdev->cpumask); 1089 spdk_vhost_dev_mem_register(vdev); 1090 rc = spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device"); 1091 if (rc != 0) { 1092 spdk_vhost_dev_mem_unregister(vdev); 1093 free(vdev->mem); 1094 spdk_vhost_free_reactor(vdev->lcore); 1095 vdev->lcore = -1; 1096 } 1097 1098 out: 1099 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1100 return rc; 1101 } 1102 1103 static int 1104 get_config(int vid, uint8_t *config, uint32_t len) 1105 { 1106 struct spdk_vhost_dev *vdev; 1107 int rc = -1; 1108 1109 pthread_mutex_lock(&g_spdk_vhost_mutex); 1110 vdev = spdk_vhost_dev_find_by_vid(vid); 1111 if (vdev == NULL) { 1112 SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid); 1113 goto out; 1114 } 1115 1116 if (vdev->backend->vhost_get_config) { 1117 rc = vdev->backend->vhost_get_config(vdev, config, len); 1118 } 1119 1120 out: 1121 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1122 return rc; 1123 } 1124 1125 static int 1126 set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1127 { 1128 struct spdk_vhost_dev *vdev; 1129 int rc = -1; 1130 1131 pthread_mutex_lock(&g_spdk_vhost_mutex); 1132 vdev = spdk_vhost_dev_find_by_vid(vid); 1133 if (vdev == NULL) { 1134 SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid); 1135 goto out; 1136 } 1137 1138 if (vdev->backend->vhost_set_config) { 1139 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1140 } 1141 1142 out: 1143 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1144 return rc; 1145 } 1146 1147 int 1148 spdk_vhost_set_socket_path(const char *basename) 1149 { 1150 int ret; 1151 1152 if (basename && strlen(basename) > 0) { 1153 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1154 if (ret <= 0) { 1155 return -EINVAL; 1156 } 1157 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1158 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1159 return -EINVAL; 1160 } 1161 1162 if (dev_dirname[ret - 1] != '/') { 1163 dev_dirname[ret] = '/'; 1164 dev_dirname[ret + 1] = '\0'; 1165 } 1166 } 1167 1168 return 0; 1169 } 1170 1171 static void * 1172 session_shutdown(void *arg) 1173 { 1174 struct spdk_vhost_dev *vdev = NULL; 1175 1176 TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) { 1177 rte_vhost_driver_unregister(vdev->path); 1178 vdev->registered = false; 1179 } 1180 1181 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1182 spdk_event_call((struct spdk_event *)arg); 1183 return NULL; 1184 } 1185 1186 void 1187 spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1188 { 1189 assert(vdev->backend->dump_info_json != NULL); 1190 vdev->backend->dump_info_json(vdev, w); 1191 } 1192 1193 int 1194 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1195 { 1196 return vdev->backend->remove_device(vdev); 1197 } 1198 1199 static int 1200 new_connection(int vid) 1201 { 1202 struct spdk_vhost_dev *vdev; 1203 char ifname[PATH_MAX]; 1204 1205 pthread_mutex_lock(&g_spdk_vhost_mutex); 1206 if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) { 1207 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid); 1208 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1209 return -1; 1210 } 1211 1212 vdev = spdk_vhost_dev_find(ifname); 1213 if (vdev == NULL) { 1214 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1215 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1216 return -1; 1217 } 1218 1219 /* since pollers are not running it safe not to use spdk_event here */ 1220 if (vdev->vid != -1) { 1221 SPDK_ERRLOG("Device with vid %d is already connected.\n", vid); 1222 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1223 return -1; 1224 } 1225 1226 vdev->vid = vid; 1227 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1228 return 0; 1229 } 1230 1231 static void 1232 destroy_connection(int vid) 1233 { 1234 struct spdk_vhost_dev *vdev; 1235 1236 pthread_mutex_lock(&g_spdk_vhost_mutex); 1237 vdev = spdk_vhost_dev_find_by_vid(vid); 1238 if (vdev == NULL) { 1239 SPDK_ERRLOG("Couldn't find device with vid %d to destroy connection for.\n", vid); 1240 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1241 return; 1242 } 1243 1244 /* since pollers are not running it safe not to use spdk_event here */ 1245 vdev->vid = -1; 1246 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1247 } 1248 1249 void 1250 spdk_vhost_call_external_event(const char *ctrlr_name, spdk_vhost_event_fn fn, void *arg) 1251 { 1252 struct spdk_vhost_dev *vdev; 1253 1254 pthread_mutex_lock(&g_spdk_vhost_mutex); 1255 vdev = spdk_vhost_dev_find(ctrlr_name); 1256 1257 if (vdev == NULL) { 1258 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1259 fn(NULL, arg); 1260 return; 1261 } 1262 1263 if (vdev->lcore == -1) { 1264 fn(vdev, arg); 1265 } else { 1266 spdk_vhost_event_async_send(vdev, fn, arg, false); 1267 } 1268 1269 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1270 } 1271 1272 static void 1273 spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev, 1274 spdk_vhost_event_fn fn, void *arg) 1275 { 1276 if (vdev == NULL) { 1277 fn(NULL, arg); 1278 return; 1279 } 1280 1281 while (vdev->lcore == -1) { 1282 fn(vdev, arg); 1283 vdev = spdk_vhost_dev_next(vdev->id); 1284 if (vdev == NULL) { 1285 fn(NULL, arg); 1286 return; 1287 } 1288 } 1289 1290 spdk_vhost_event_async_send(vdev, fn, arg, true); 1291 } 1292 1293 void 1294 spdk_vhost_call_external_event_foreach(spdk_vhost_event_fn fn, void *arg) 1295 { 1296 struct spdk_vhost_dev *vdev; 1297 1298 pthread_mutex_lock(&g_spdk_vhost_mutex); 1299 vdev = TAILQ_FIRST(&g_spdk_vhost_devices); 1300 spdk_vhost_external_event_foreach_continue(vdev, fn, arg); 1301 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1302 } 1303 1304 void 1305 spdk_vhost_lock(void) 1306 { 1307 pthread_mutex_lock(&g_spdk_vhost_mutex); 1308 } 1309 1310 void 1311 spdk_vhost_unlock(void) 1312 { 1313 pthread_mutex_unlock(&g_spdk_vhost_mutex); 1314 } 1315 1316 int 1317 spdk_vhost_init(void) 1318 { 1319 uint32_t last_core; 1320 int ret; 1321 1322 last_core = spdk_env_get_last_core(); 1323 g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t)); 1324 if (!g_num_ctrlrs) { 1325 SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n", 1326 last_core + 1); 1327 return -1; 1328 } 1329 1330 ret = spdk_vhost_scsi_controller_construct(); 1331 if (ret != 0) { 1332 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1333 return -1; 1334 } 1335 1336 ret = spdk_vhost_blk_controller_construct(); 1337 if (ret != 0) { 1338 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1339 return -1; 1340 } 1341 1342 ret = spdk_vhost_nvme_controller_construct(); 1343 if (ret != 0) { 1344 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1345 return -1; 1346 } 1347 1348 return 0; 1349 } 1350 1351 static int 1352 _spdk_vhost_fini_remove_vdev_cb(struct spdk_vhost_dev *vdev, void *arg) 1353 { 1354 spdk_vhost_fini_cb fini_cb = arg; 1355 1356 if (vdev != NULL) { 1357 spdk_vhost_dev_remove(vdev); 1358 return 0; 1359 } 1360 1361 /* All devices are removed now. */ 1362 free(g_num_ctrlrs); 1363 fini_cb(); 1364 return 0; 1365 } 1366 1367 static void 1368 _spdk_vhost_fini(void *arg1, void *arg2) 1369 { 1370 spdk_vhost_fini_cb fini_cb = arg1; 1371 1372 spdk_vhost_call_external_event_foreach(_spdk_vhost_fini_remove_vdev_cb, fini_cb); 1373 } 1374 1375 void 1376 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1377 { 1378 pthread_t tid; 1379 int rc; 1380 struct spdk_event *fini_ev; 1381 1382 fini_ev = spdk_event_allocate(spdk_env_get_current_core(), _spdk_vhost_fini, fini_cb, NULL); 1383 1384 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1385 * ops for stopping a device or removing a connection, we need to call it from 1386 * a separate thread to avoid deadlock. 1387 */ 1388 rc = pthread_create(&tid, NULL, &session_shutdown, fini_ev); 1389 if (rc < 0) { 1390 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1391 abort(); 1392 } 1393 pthread_detach(tid); 1394 } 1395 1396 struct spdk_vhost_write_config_json_ctx { 1397 struct spdk_json_write_ctx *w; 1398 struct spdk_event *done_ev; 1399 }; 1400 1401 static int 1402 spdk_vhost_config_json_cb(struct spdk_vhost_dev *vdev, void *arg) 1403 { 1404 struct spdk_vhost_write_config_json_ctx *ctx = arg; 1405 uint32_t delay_base_us; 1406 uint32_t iops_threshold; 1407 1408 if (vdev == NULL) { 1409 spdk_json_write_array_end(ctx->w); 1410 spdk_event_call(ctx->done_ev); 1411 free(ctx); 1412 return 0; 1413 } 1414 1415 vdev->backend->write_config_json(vdev, ctx->w); 1416 1417 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1418 if (delay_base_us) { 1419 spdk_json_write_object_begin(ctx->w); 1420 spdk_json_write_named_string(ctx->w, "method", "set_vhost_controller_coalescing"); 1421 1422 spdk_json_write_named_object_begin(ctx->w, "params"); 1423 spdk_json_write_named_string(ctx->w, "ctrlr", vdev->name); 1424 spdk_json_write_named_uint32(ctx->w, "delay_base_us", delay_base_us); 1425 spdk_json_write_named_uint32(ctx->w, "iops_threshold", iops_threshold); 1426 spdk_json_write_object_end(ctx->w); 1427 1428 spdk_json_write_object_end(ctx->w); 1429 } 1430 1431 return 0; 1432 } 1433 1434 void 1435 spdk_vhost_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev) 1436 { 1437 struct spdk_vhost_write_config_json_ctx *ctx; 1438 1439 ctx = calloc(1, sizeof(*ctx)); 1440 if (!ctx) { 1441 spdk_event_call(done_ev); 1442 return; 1443 } 1444 1445 ctx->w = w; 1446 ctx->done_ev = done_ev; 1447 1448 spdk_json_write_array_begin(w); 1449 1450 spdk_vhost_call_external_event_foreach(spdk_vhost_config_json_cb, ctx); 1451 } 1452 1453 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1454 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1455