1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/memory.h" 41 #include "spdk/barrier.h" 42 #include "spdk/vhost.h" 43 #include "vhost_internal.h" 44 45 /* Path to folder where character device will be created. Can be set by user. */ 46 static char dev_dirname[PATH_MAX] = ""; 47 48 /* Thread performing all vhost management operations */ 49 static struct spdk_thread *g_vhost_init_thread; 50 51 static spdk_vhost_fini_cb g_fini_cpl_cb; 52 53 /** 54 * DPDK calls our callbacks synchronously but the work those callbacks 55 * perform needs to be async. Luckily, all DPDK callbacks are called on 56 * a DPDK-internal pthread, so we'll just wait on a semaphore in there. 57 */ 58 static sem_t g_dpdk_sem; 59 60 /** Return code for the current DPDK callback */ 61 static int g_dpdk_response; 62 63 struct vhost_session_fn_ctx { 64 /** Device pointer obtained before enqueuing the event */ 65 struct spdk_vhost_dev *vdev; 66 67 /** ID of the session to send event to. */ 68 uint32_t vsession_id; 69 70 /** User provided function to be executed on session's thread. */ 71 spdk_vhost_session_fn cb_fn; 72 73 /** 74 * User provided function to be called on the init thread 75 * after iterating through all sessions. 76 */ 77 spdk_vhost_dev_fn cpl_fn; 78 79 /** Custom user context */ 80 void *user_ctx; 81 }; 82 83 static TAILQ_HEAD(, spdk_vhost_dev) g_vhost_devices = TAILQ_HEAD_INITIALIZER( 84 g_vhost_devices); 85 static pthread_mutex_t g_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 86 87 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len) 88 { 89 void *vva; 90 uint64_t newlen; 91 92 newlen = len; 93 vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen); 94 if (newlen != len) { 95 return NULL; 96 } 97 98 return vva; 99 100 } 101 102 static void 103 vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 104 uint16_t req_id) 105 { 106 struct vring_desc *desc, *desc_table; 107 uint32_t desc_table_size; 108 int rc; 109 110 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 111 return; 112 } 113 114 rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 115 if (spdk_unlikely(rc != 0)) { 116 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 117 return; 118 } 119 120 do { 121 if (vhost_vring_desc_is_wr(desc)) { 122 /* To be honest, only pages realy touched should be logged, but 123 * doing so would require tracking those changes in each backed. 124 * Also backend most likely will touch all/most of those pages so 125 * for lets assume we touched all pages passed to as writeable buffers. */ 126 rte_vhost_log_write(vsession->vid, desc->addr, desc->len); 127 } 128 vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 129 } while (desc); 130 } 131 132 static void 133 vhost_log_used_vring_elem(struct spdk_vhost_session *vsession, 134 struct spdk_vhost_virtqueue *virtqueue, 135 uint16_t idx) 136 { 137 uint64_t offset, len; 138 139 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 140 return; 141 } 142 143 if (spdk_unlikely(virtqueue->packed.packed_ring)) { 144 offset = idx * sizeof(struct vring_packed_desc); 145 len = sizeof(struct vring_packed_desc); 146 } else { 147 offset = offsetof(struct vring_used, ring[idx]); 148 len = sizeof(virtqueue->vring.used->ring[idx]); 149 } 150 151 rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len); 152 } 153 154 static void 155 vhost_log_used_vring_idx(struct spdk_vhost_session *vsession, 156 struct spdk_vhost_virtqueue *virtqueue) 157 { 158 uint64_t offset, len; 159 uint16_t vq_idx; 160 161 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 162 return; 163 } 164 165 offset = offsetof(struct vring_used, idx); 166 len = sizeof(virtqueue->vring.used->idx); 167 vq_idx = virtqueue - vsession->virtqueue; 168 169 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 170 } 171 172 /* 173 * Get available requests from avail ring. 174 */ 175 uint16_t 176 vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 177 uint16_t reqs_len) 178 { 179 struct rte_vhost_vring *vring = &virtqueue->vring; 180 struct vring_avail *avail = vring->avail; 181 uint16_t size_mask = vring->size - 1; 182 uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx; 183 uint16_t count, i; 184 185 count = avail_idx - last_idx; 186 if (spdk_likely(count == 0)) { 187 return 0; 188 } 189 190 if (spdk_unlikely(count > vring->size)) { 191 /* TODO: the queue is unrecoverably broken and should be marked so. 192 * For now we will fail silently and report there are no new avail entries. 193 */ 194 return 0; 195 } 196 197 count = spdk_min(count, reqs_len); 198 virtqueue->last_avail_idx += count; 199 for (i = 0; i < count; i++) { 200 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 201 } 202 203 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 204 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 205 last_idx, avail_idx, count); 206 207 return count; 208 } 209 210 static bool 211 vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 212 { 213 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 214 } 215 216 static bool 217 vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc) 218 { 219 return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0; 220 } 221 222 int 223 vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 224 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 225 uint32_t *desc_table_size) 226 { 227 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 228 return -1; 229 } 230 231 *desc = &virtqueue->vring.desc[req_idx]; 232 233 if (vhost_vring_desc_is_indirect(*desc)) { 234 *desc_table_size = (*desc)->len / sizeof(**desc); 235 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr, 236 sizeof(**desc) * *desc_table_size); 237 *desc = *desc_table; 238 if (*desc == NULL) { 239 return -1; 240 } 241 242 return 0; 243 } 244 245 *desc_table = virtqueue->vring.desc; 246 *desc_table_size = virtqueue->vring.size; 247 248 return 0; 249 } 250 251 int 252 vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession, 253 struct spdk_vhost_virtqueue *virtqueue, 254 uint16_t req_idx, struct vring_packed_desc **desc, 255 struct vring_packed_desc **desc_table, uint32_t *desc_table_size) 256 { 257 *desc = &virtqueue->vring.desc_packed[req_idx]; 258 259 /* In packed ring when the desc is non-indirect we get next desc 260 * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc 261 * is indirect we get next desc by idx and desc_table_size. It's 262 * different from split ring. 263 */ 264 if (vhost_vring_packed_desc_is_indirect(*desc)) { 265 *desc_table_size = (*desc)->len / sizeof(struct vring_packed_desc); 266 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr, 267 (*desc)->len); 268 *desc = *desc_table; 269 if (spdk_unlikely(*desc == NULL)) { 270 return -1; 271 } 272 } else { 273 *desc_table = NULL; 274 *desc_table_size = 0; 275 } 276 277 return 0; 278 } 279 280 int 281 vhost_vq_used_signal(struct spdk_vhost_session *vsession, 282 struct spdk_vhost_virtqueue *virtqueue) 283 { 284 if (virtqueue->used_req_cnt == 0) { 285 return 0; 286 } 287 288 virtqueue->req_cnt += virtqueue->used_req_cnt; 289 virtqueue->used_req_cnt = 0; 290 291 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 292 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 293 virtqueue - vsession->virtqueue, virtqueue->last_used_idx); 294 295 if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) { 296 /* interrupt signalled */ 297 return 1; 298 } else { 299 /* interrupt not signalled */ 300 return 0; 301 } 302 } 303 304 305 static void 306 check_session_io_stats(struct spdk_vhost_session *vsession, uint64_t now) 307 { 308 struct spdk_vhost_virtqueue *virtqueue; 309 uint32_t irq_delay_base = vsession->coalescing_delay_time_base; 310 uint32_t io_threshold = vsession->coalescing_io_rate_threshold; 311 int32_t irq_delay; 312 uint32_t req_cnt; 313 uint16_t q_idx; 314 315 if (now < vsession->next_stats_check_time) { 316 return; 317 } 318 319 vsession->next_stats_check_time = now + vsession->stats_check_interval; 320 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 321 virtqueue = &vsession->virtqueue[q_idx]; 322 323 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 324 if (req_cnt <= io_threshold) { 325 continue; 326 } 327 328 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 329 virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay); 330 331 virtqueue->req_cnt = 0; 332 virtqueue->next_event_time = now; 333 } 334 } 335 336 static inline bool 337 vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq) 338 { 339 if (spdk_unlikely(vq->packed.packed_ring)) { 340 if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) { 341 return true; 342 } 343 } else { 344 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { 345 return true; 346 } 347 } 348 349 return false; 350 } 351 352 void 353 vhost_session_used_signal(struct spdk_vhost_session *vsession) 354 { 355 struct spdk_vhost_virtqueue *virtqueue; 356 uint64_t now; 357 uint16_t q_idx; 358 359 if (vsession->coalescing_delay_time_base == 0) { 360 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 361 virtqueue = &vsession->virtqueue[q_idx]; 362 363 if (virtqueue->vring.desc == NULL) { 364 continue; 365 } 366 367 if (vhost_vq_event_is_suppressed(virtqueue)) { 368 continue; 369 } 370 371 vhost_vq_used_signal(vsession, virtqueue); 372 } 373 } else { 374 now = spdk_get_ticks(); 375 check_session_io_stats(vsession, now); 376 377 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 378 virtqueue = &vsession->virtqueue[q_idx]; 379 380 /* No need for event right now */ 381 if (now < virtqueue->next_event_time) { 382 continue; 383 } 384 385 if (vhost_vq_event_is_suppressed(virtqueue)) { 386 continue; 387 } 388 389 if (!vhost_vq_used_signal(vsession, virtqueue)) { 390 continue; 391 } 392 393 /* Syscall is quite long so update time */ 394 now = spdk_get_ticks(); 395 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 396 } 397 } 398 } 399 400 static int 401 vhost_session_set_coalescing(struct spdk_vhost_dev *vdev, 402 struct spdk_vhost_session *vsession, void *ctx) 403 { 404 vsession->coalescing_delay_time_base = 405 vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL; 406 vsession->coalescing_io_rate_threshold = 407 vdev->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 408 return 0; 409 } 410 411 static int 412 vhost_dev_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 413 uint32_t iops_threshold) 414 { 415 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 416 uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 417 418 if (delay_time_base >= UINT32_MAX) { 419 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 420 return -EINVAL; 421 } else if (io_rate == 0) { 422 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 423 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS); 424 return -EINVAL; 425 } 426 427 vdev->coalescing_delay_us = delay_base_us; 428 vdev->coalescing_iops_threshold = iops_threshold; 429 return 0; 430 } 431 432 int 433 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 434 uint32_t iops_threshold) 435 { 436 int rc; 437 438 rc = vhost_dev_set_coalescing(vdev, delay_base_us, iops_threshold); 439 if (rc != 0) { 440 return rc; 441 } 442 443 vhost_dev_foreach_session(vdev, vhost_session_set_coalescing, NULL, NULL); 444 return 0; 445 } 446 447 void 448 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 449 uint32_t *iops_threshold) 450 { 451 if (delay_base_us) { 452 *delay_base_us = vdev->coalescing_delay_us; 453 } 454 455 if (iops_threshold) { 456 *iops_threshold = vdev->coalescing_iops_threshold; 457 } 458 } 459 460 /* 461 * Enqueue id and len to used ring. 462 */ 463 void 464 vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 465 struct spdk_vhost_virtqueue *virtqueue, 466 uint16_t id, uint32_t len) 467 { 468 struct rte_vhost_vring *vring = &virtqueue->vring; 469 struct vring_used *used = vring->used; 470 uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1); 471 uint16_t vq_idx = virtqueue->vring_idx; 472 473 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 474 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 475 virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len); 476 477 vhost_log_req_desc(vsession, virtqueue, id); 478 479 virtqueue->last_used_idx++; 480 used->ring[last_idx].id = id; 481 used->ring[last_idx].len = len; 482 483 /* Ensure the used ring is updated before we log it or increment used->idx. */ 484 spdk_smp_wmb(); 485 486 rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id); 487 488 vhost_log_used_vring_elem(vsession, virtqueue, last_idx); 489 * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx; 490 vhost_log_used_vring_idx(vsession, virtqueue); 491 492 rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id); 493 494 virtqueue->used_req_cnt++; 495 } 496 497 void 498 vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession, 499 struct spdk_vhost_virtqueue *virtqueue, 500 uint16_t num_descs, uint16_t buffer_id, 501 uint32_t length) 502 { 503 struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx]; 504 bool used, avail; 505 506 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 507 "Queue %td - RING: buffer_id=%"PRIu16"\n", 508 virtqueue - vsession->virtqueue, buffer_id); 509 510 /* When the descriptor is used, two flags in descriptor 511 * avail flag and used flag are set to equal 512 * and used flag value == used_wrap_counter. 513 */ 514 used = !!(desc->flags & VRING_DESC_F_USED); 515 avail = !!(desc->flags & VRING_DESC_F_AVAIL); 516 if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) { 517 SPDK_ERRLOG("descriptor has been used before\n"); 518 return; 519 } 520 521 /* In used desc addr is unused and len specifies the buffer length 522 * that has been written to by the device. 523 */ 524 desc->addr = 0; 525 desc->len = length; 526 527 /* This bit specifies whether any data has been written by the device */ 528 if (length != 0) { 529 desc->flags |= VRING_DESC_F_WRITE; 530 } 531 532 /* Buffer ID is included in the last descriptor in the list. 533 * The driver needs to keep track of the size of the list corresponding 534 * to each buffer ID. 535 */ 536 desc->id = buffer_id; 537 538 /* A device MUST NOT make the descriptor used before buffer_id is 539 * written to the descriptor. 540 */ 541 spdk_smp_wmb(); 542 /* To mark a desc as used, the device sets the F_USED bit in flags to match 543 * the internal Device ring wrap counter. It also sets the F_AVAIL bit to 544 * match the same value. 545 */ 546 if (virtqueue->packed.used_phase) { 547 desc->flags |= VRING_DESC_F_AVAIL_USED; 548 } else { 549 desc->flags &= ~VRING_DESC_F_AVAIL_USED; 550 } 551 552 vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx); 553 virtqueue->last_used_idx += num_descs; 554 if (virtqueue->last_used_idx >= virtqueue->vring.size) { 555 virtqueue->last_used_idx -= virtqueue->vring.size; 556 virtqueue->packed.used_phase = !virtqueue->packed.used_phase; 557 } 558 559 virtqueue->used_req_cnt++; 560 } 561 562 bool 563 vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue) 564 { 565 uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags; 566 567 /* To mark a desc as available, the driver sets the F_AVAIL bit in flags 568 * to match the internal avail wrap counter. It also sets the F_USED bit to 569 * match the inverse value but it's not mandatory. 570 */ 571 return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase); 572 } 573 574 bool 575 vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc) 576 { 577 return (cur_desc->flags & VRING_DESC_F_WRITE) != 0; 578 } 579 580 int 581 vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx, 582 struct spdk_vhost_virtqueue *vq, 583 struct vring_packed_desc *desc_table, 584 uint32_t desc_table_size) 585 { 586 if (desc_table != NULL) { 587 /* When the desc_table isn't NULL means it's indirect and we get the next 588 * desc by req_idx and desc_table_size. The return value is NULL means 589 * we reach the last desc of this request. 590 */ 591 (*req_idx)++; 592 if (*req_idx < desc_table_size) { 593 *desc = &desc_table[*req_idx]; 594 } else { 595 *desc = NULL; 596 } 597 } else { 598 /* When the desc_table is NULL means it's non-indirect and we get the next 599 * desc by req_idx and F_NEXT in flags. The return value is NULL means 600 * we reach the last desc of this request. When return new desc 601 * we update the req_idx too. 602 */ 603 if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) { 604 *desc = NULL; 605 return 0; 606 } 607 608 *req_idx = (*req_idx + 1) % vq->vring.size; 609 *desc = &vq->vring.desc_packed[*req_idx]; 610 } 611 612 return 0; 613 } 614 615 static int 616 vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 617 uint16_t *iov_index, uintptr_t payload, uint64_t remaining) 618 { 619 uintptr_t vva; 620 uint64_t len; 621 622 do { 623 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 624 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 625 return -1; 626 } 627 len = remaining; 628 vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len); 629 if (vva == 0 || len == 0) { 630 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 631 return -1; 632 } 633 iov[*iov_index].iov_base = (void *)vva; 634 iov[*iov_index].iov_len = len; 635 remaining -= len; 636 payload += len; 637 (*iov_index)++; 638 } while (remaining); 639 640 return 0; 641 } 642 643 int 644 vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 645 uint16_t *iov_index, const struct vring_packed_desc *desc) 646 { 647 return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index, 648 desc->addr, desc->len); 649 } 650 651 /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx. 652 * 2, Update the vq->last_avail_idx to point next available desc chain. 653 * 3, Update the avail_wrap_counter if last_avail_idx overturn. 654 */ 655 uint16_t 656 vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 657 uint16_t *num_descs) 658 { 659 struct vring_packed_desc *desc; 660 uint16_t desc_head = req_idx; 661 662 *num_descs = 1; 663 664 desc = &vq->vring.desc_packed[req_idx]; 665 if (!vhost_vring_packed_desc_is_indirect(desc)) { 666 while ((desc->flags & VRING_DESC_F_NEXT) != 0) { 667 req_idx = (req_idx + 1) % vq->vring.size; 668 desc = &vq->vring.desc_packed[req_idx]; 669 (*num_descs)++; 670 } 671 } 672 673 /* Queue Size doesn't have to be a power of 2 674 * Device maintains last_avail_idx so we can make sure 675 * the value is valid(0 ~ vring.size - 1) 676 */ 677 vq->last_avail_idx = (req_idx + 1) % vq->vring.size; 678 if (vq->last_avail_idx < desc_head) { 679 vq->packed.avail_phase = !vq->packed.avail_phase; 680 } 681 682 return desc->id; 683 } 684 685 int 686 vhost_vring_desc_get_next(struct vring_desc **desc, 687 struct vring_desc *desc_table, uint32_t desc_table_size) 688 { 689 struct vring_desc *old_desc = *desc; 690 uint16_t next_idx; 691 692 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 693 *desc = NULL; 694 return 0; 695 } 696 697 next_idx = old_desc->next; 698 if (spdk_unlikely(next_idx >= desc_table_size)) { 699 *desc = NULL; 700 return -1; 701 } 702 703 *desc = &desc_table[next_idx]; 704 return 0; 705 } 706 707 bool 708 vhost_vring_desc_is_wr(struct vring_desc *cur_desc) 709 { 710 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 711 } 712 713 int 714 vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 715 uint16_t *iov_index, const struct vring_desc *desc) 716 { 717 return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index, 718 desc->addr, desc->len); 719 } 720 721 static struct spdk_vhost_session * 722 vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id) 723 { 724 struct spdk_vhost_session *vsession; 725 726 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 727 if (vsession->id == id) { 728 return vsession; 729 } 730 } 731 732 return NULL; 733 } 734 735 struct spdk_vhost_session * 736 vhost_session_find_by_vid(int vid) 737 { 738 struct spdk_vhost_dev *vdev; 739 struct spdk_vhost_session *vsession; 740 741 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 742 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 743 if (vsession->vid == vid) { 744 return vsession; 745 } 746 } 747 } 748 749 return NULL; 750 } 751 752 struct spdk_vhost_dev * 753 spdk_vhost_dev_next(struct spdk_vhost_dev *vdev) 754 { 755 if (vdev == NULL) { 756 return TAILQ_FIRST(&g_vhost_devices); 757 } 758 759 return TAILQ_NEXT(vdev, tailq); 760 } 761 762 struct spdk_vhost_dev * 763 spdk_vhost_dev_find(const char *ctrlr_name) 764 { 765 struct spdk_vhost_dev *vdev; 766 size_t dev_dirname_len = strlen(dev_dirname); 767 768 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 769 ctrlr_name += dev_dirname_len; 770 } 771 772 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 773 if (strcmp(vdev->name, ctrlr_name) == 0) { 774 return vdev; 775 } 776 } 777 778 return NULL; 779 } 780 781 static int 782 vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 783 { 784 int rc; 785 786 if (cpumask == NULL) { 787 return -1; 788 } 789 790 if (mask == NULL) { 791 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 792 return 0; 793 } 794 795 rc = spdk_app_parse_core_mask(mask, cpumask); 796 if (rc < 0) { 797 SPDK_ERRLOG("invalid cpumask %s\n", mask); 798 return -1; 799 } 800 801 if (spdk_cpuset_count(cpumask) == 0) { 802 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 803 spdk_cpuset_fmt(spdk_app_get_core_mask())); 804 return -1; 805 } 806 807 return 0; 808 } 809 810 static void 811 vhost_dev_thread_exit(void *arg1) 812 { 813 spdk_thread_exit(spdk_get_thread()); 814 } 815 816 int 817 vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 818 const struct spdk_vhost_dev_backend *backend) 819 { 820 char path[PATH_MAX]; 821 struct spdk_cpuset cpumask = {}; 822 int rc; 823 824 assert(vdev); 825 if (name == NULL) { 826 SPDK_ERRLOG("Can't register controller with no name\n"); 827 return -EINVAL; 828 } 829 830 if (vhost_parse_core_mask(mask_str, &cpumask) != 0) { 831 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 832 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 833 return -EINVAL; 834 } 835 836 if (spdk_vhost_dev_find(name)) { 837 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 838 return -EEXIST; 839 } 840 841 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 842 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 843 name); 844 return -EINVAL; 845 } 846 847 vdev->name = strdup(name); 848 vdev->path = strdup(path); 849 if (vdev->name == NULL || vdev->path == NULL) { 850 rc = -EIO; 851 goto out; 852 } 853 854 vdev->thread = spdk_thread_create(vdev->name, &cpumask); 855 if (vdev->thread == NULL) { 856 SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name); 857 rc = -EIO; 858 goto out; 859 } 860 861 vdev->registered = true; 862 vdev->backend = backend; 863 TAILQ_INIT(&vdev->vsessions); 864 865 vhost_dev_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 866 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 867 868 if (vhost_register_unix_socket(path, name, vdev->virtio_features, vdev->disabled_features, 869 vdev->protocol_features)) { 870 spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL); 871 rc = -EIO; 872 goto out; 873 } 874 875 TAILQ_INSERT_TAIL(&g_vhost_devices, vdev, tailq); 876 877 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 878 return 0; 879 880 out: 881 free(vdev->name); 882 free(vdev->path); 883 return rc; 884 } 885 886 int 887 vhost_dev_unregister(struct spdk_vhost_dev *vdev) 888 { 889 if (!TAILQ_EMPTY(&vdev->vsessions)) { 890 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 891 return -EBUSY; 892 } 893 894 if (vdev->registered && vhost_driver_unregister(vdev->path) != 0) { 895 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 896 "Check if domain socket %s still exists\n", 897 vdev->name, vdev->path); 898 return -EIO; 899 } 900 901 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 902 903 spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL); 904 905 free(vdev->name); 906 free(vdev->path); 907 TAILQ_REMOVE(&g_vhost_devices, vdev, tailq); 908 return 0; 909 } 910 911 const char * 912 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 913 { 914 assert(vdev != NULL); 915 return vdev->name; 916 } 917 918 const struct spdk_cpuset * 919 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 920 { 921 assert(vdev != NULL); 922 return spdk_thread_get_cpumask(vdev->thread); 923 } 924 925 static void 926 wait_for_semaphore(int timeout_sec, const char *errmsg) 927 { 928 struct timespec timeout; 929 int rc; 930 931 clock_gettime(CLOCK_REALTIME, &timeout); 932 timeout.tv_sec += timeout_sec; 933 rc = sem_timedwait(&g_dpdk_sem, &timeout); 934 if (rc != 0) { 935 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 936 sem_wait(&g_dpdk_sem); 937 } 938 } 939 940 static void 941 vhost_session_cb_done(int rc) 942 { 943 g_dpdk_response = rc; 944 sem_post(&g_dpdk_sem); 945 } 946 947 void 948 vhost_session_start_done(struct spdk_vhost_session *vsession, int response) 949 { 950 if (response == 0) { 951 vsession->started = true; 952 953 assert(vsession->vdev->active_session_num < UINT32_MAX); 954 vsession->vdev->active_session_num++; 955 } 956 957 vhost_session_cb_done(response); 958 } 959 960 void 961 vhost_session_stop_done(struct spdk_vhost_session *vsession, int response) 962 { 963 if (response == 0) { 964 vsession->started = false; 965 966 assert(vsession->vdev->active_session_num > 0); 967 vsession->vdev->active_session_num--; 968 } 969 970 vhost_session_cb_done(response); 971 } 972 973 static void 974 vhost_event_cb(void *arg1) 975 { 976 struct vhost_session_fn_ctx *ctx = arg1; 977 struct spdk_vhost_session *vsession; 978 979 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 980 spdk_thread_send_msg(spdk_get_thread(), vhost_event_cb, arg1); 981 return; 982 } 983 984 vsession = vhost_session_find_by_id(ctx->vdev, ctx->vsession_id); 985 ctx->cb_fn(ctx->vdev, vsession, NULL); 986 pthread_mutex_unlock(&g_vhost_mutex); 987 } 988 989 int 990 vhost_session_send_event(struct spdk_vhost_session *vsession, 991 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 992 const char *errmsg) 993 { 994 struct vhost_session_fn_ctx ev_ctx = {0}; 995 struct spdk_vhost_dev *vdev = vsession->vdev; 996 997 ev_ctx.vdev = vdev; 998 ev_ctx.vsession_id = vsession->id; 999 ev_ctx.cb_fn = cb_fn; 1000 1001 spdk_thread_send_msg(vdev->thread, vhost_event_cb, &ev_ctx); 1002 1003 pthread_mutex_unlock(&g_vhost_mutex); 1004 wait_for_semaphore(timeout_sec, errmsg); 1005 pthread_mutex_lock(&g_vhost_mutex); 1006 1007 return g_dpdk_response; 1008 } 1009 1010 static void 1011 foreach_session_finish_cb(void *arg1) 1012 { 1013 struct vhost_session_fn_ctx *ev_ctx = arg1; 1014 struct spdk_vhost_dev *vdev = ev_ctx->vdev; 1015 1016 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 1017 spdk_thread_send_msg(spdk_get_thread(), 1018 foreach_session_finish_cb, arg1); 1019 return; 1020 } 1021 1022 assert(vdev->pending_async_op_num > 0); 1023 vdev->pending_async_op_num--; 1024 if (ev_ctx->cpl_fn != NULL) { 1025 ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx); 1026 } 1027 1028 pthread_mutex_unlock(&g_vhost_mutex); 1029 free(ev_ctx); 1030 } 1031 1032 static void 1033 foreach_session(void *arg1) 1034 { 1035 struct vhost_session_fn_ctx *ev_ctx = arg1; 1036 struct spdk_vhost_session *vsession; 1037 struct spdk_vhost_dev *vdev = ev_ctx->vdev; 1038 int rc; 1039 1040 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 1041 spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1); 1042 return; 1043 } 1044 1045 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 1046 if (vsession->initialized) { 1047 rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx); 1048 if (rc < 0) { 1049 goto out; 1050 } 1051 } 1052 } 1053 1054 out: 1055 pthread_mutex_unlock(&g_vhost_mutex); 1056 1057 spdk_thread_send_msg(g_vhost_init_thread, foreach_session_finish_cb, arg1); 1058 } 1059 1060 void 1061 vhost_dev_foreach_session(struct spdk_vhost_dev *vdev, 1062 spdk_vhost_session_fn fn, 1063 spdk_vhost_dev_fn cpl_fn, 1064 void *arg) 1065 { 1066 struct vhost_session_fn_ctx *ev_ctx; 1067 1068 ev_ctx = calloc(1, sizeof(*ev_ctx)); 1069 if (ev_ctx == NULL) { 1070 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 1071 assert(false); 1072 return; 1073 } 1074 1075 ev_ctx->vdev = vdev; 1076 ev_ctx->cb_fn = fn; 1077 ev_ctx->cpl_fn = cpl_fn; 1078 ev_ctx->user_ctx = arg; 1079 1080 assert(vdev->pending_async_op_num < UINT32_MAX); 1081 vdev->pending_async_op_num++; 1082 1083 spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx); 1084 } 1085 1086 static int 1087 _stop_session(struct spdk_vhost_session *vsession) 1088 { 1089 struct spdk_vhost_dev *vdev = vsession->vdev; 1090 struct spdk_vhost_virtqueue *q; 1091 int rc; 1092 uint16_t i; 1093 1094 rc = vdev->backend->stop_session(vsession); 1095 if (rc != 0) { 1096 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid); 1097 pthread_mutex_unlock(&g_vhost_mutex); 1098 return rc; 1099 } 1100 1101 for (i = 0; i < vsession->max_queues; i++) { 1102 q = &vsession->virtqueue[i]; 1103 1104 /* vring.desc and vring.desc_packed are in a union struct 1105 * so q->vring.desc can replace q->vring.desc_packed. 1106 */ 1107 if (q->vring.desc == NULL) { 1108 continue; 1109 } 1110 1111 /* Packed virtqueues support up to 2^15 entries each 1112 * so left one bit can be used as wrap counter. 1113 */ 1114 if (q->packed.packed_ring) { 1115 q->last_avail_idx = q->last_avail_idx | 1116 ((uint16_t)q->packed.avail_phase << 15); 1117 q->last_used_idx = q->last_used_idx | 1118 ((uint16_t)q->packed.used_phase << 15); 1119 } 1120 1121 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx); 1122 } 1123 1124 vhost_session_mem_unregister(vsession->mem); 1125 free(vsession->mem); 1126 1127 return 0; 1128 } 1129 1130 int 1131 vhost_stop_device_cb(int vid) 1132 { 1133 struct spdk_vhost_session *vsession; 1134 int rc; 1135 1136 pthread_mutex_lock(&g_vhost_mutex); 1137 vsession = vhost_session_find_by_vid(vid); 1138 if (vsession == NULL) { 1139 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1140 pthread_mutex_unlock(&g_vhost_mutex); 1141 return -EINVAL; 1142 } 1143 1144 if (!vsession->started) { 1145 /* already stopped, nothing to do */ 1146 pthread_mutex_unlock(&g_vhost_mutex); 1147 return -EALREADY; 1148 } 1149 1150 rc = _stop_session(vsession); 1151 pthread_mutex_unlock(&g_vhost_mutex); 1152 1153 return rc; 1154 } 1155 1156 int 1157 vhost_start_device_cb(int vid) 1158 { 1159 struct spdk_vhost_dev *vdev; 1160 struct spdk_vhost_session *vsession; 1161 int rc = -1; 1162 uint16_t i; 1163 bool packed_ring; 1164 1165 pthread_mutex_lock(&g_vhost_mutex); 1166 1167 vsession = vhost_session_find_by_vid(vid); 1168 if (vsession == NULL) { 1169 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1170 goto out; 1171 } 1172 1173 vdev = vsession->vdev; 1174 if (vsession->started) { 1175 /* already started, nothing to do */ 1176 rc = 0; 1177 goto out; 1178 } 1179 1180 if (vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) { 1181 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1182 goto out; 1183 } 1184 1185 packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0); 1186 1187 vsession->max_queues = 0; 1188 memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue)); 1189 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1190 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1191 1192 q->vring_idx = -1; 1193 if (rte_vhost_get_vhost_vring(vid, i, &q->vring)) { 1194 continue; 1195 } 1196 q->vring_idx = i; 1197 rte_vhost_get_vhost_ring_inflight(vid, i, &q->vring_inflight); 1198 1199 /* vring.desc and vring.desc_packed are in a union struct 1200 * so q->vring.desc can replace q->vring.desc_packed. 1201 */ 1202 if (q->vring.desc == NULL || q->vring.size == 0) { 1203 continue; 1204 } 1205 1206 if (rte_vhost_get_vring_base(vsession->vid, i, &q->last_avail_idx, &q->last_used_idx)) { 1207 q->vring.desc = NULL; 1208 continue; 1209 } 1210 1211 if (packed_ring) { 1212 /* Packed virtqueues support up to 2^15 entries each 1213 * so left one bit can be used as wrap counter. 1214 */ 1215 q->packed.avail_phase = q->last_avail_idx >> 15; 1216 q->last_avail_idx = q->last_avail_idx & 0x7FFF; 1217 q->packed.used_phase = q->last_used_idx >> 15; 1218 q->last_used_idx = q->last_used_idx & 0x7FFF; 1219 1220 /* Disable I/O submission notifications, we'll be polling. */ 1221 q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; 1222 } else { 1223 /* Disable I/O submission notifications, we'll be polling. */ 1224 q->vring.used->flags = VRING_USED_F_NO_NOTIFY; 1225 } 1226 1227 q->packed.packed_ring = packed_ring; 1228 vsession->max_queues = i + 1; 1229 } 1230 1231 if (vhost_get_mem_table(vid, &vsession->mem) != 0) { 1232 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1233 goto out; 1234 } 1235 1236 /* 1237 * Not sure right now but this look like some kind of QEMU bug and guest IO 1238 * might be frozed without kicking all queues after live-migration. This look like 1239 * the previous vhost instance failed to effectively deliver all interrupts before 1240 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1241 * should be ignored by guest virtio driver. 1242 * 1243 * Tested on QEMU 2.10.91 and 2.11.50. 1244 */ 1245 for (i = 0; i < vsession->max_queues; i++) { 1246 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1247 1248 /* vring.desc and vring.desc_packed are in a union struct 1249 * so q->vring.desc can replace q->vring.desc_packed. 1250 */ 1251 if (q->vring.desc != NULL && q->vring.size > 0) { 1252 rte_vhost_vring_call(vsession->vid, q->vring_idx); 1253 } 1254 } 1255 1256 vhost_session_set_coalescing(vdev, vsession, NULL); 1257 vhost_session_mem_register(vsession->mem); 1258 vsession->initialized = true; 1259 rc = vdev->backend->start_session(vsession); 1260 if (rc != 0) { 1261 vhost_session_mem_unregister(vsession->mem); 1262 free(vsession->mem); 1263 goto out; 1264 } 1265 1266 out: 1267 pthread_mutex_unlock(&g_vhost_mutex); 1268 return rc; 1269 } 1270 1271 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1272 int 1273 vhost_get_config_cb(int vid, uint8_t *config, uint32_t len) 1274 { 1275 struct spdk_vhost_session *vsession; 1276 struct spdk_vhost_dev *vdev; 1277 int rc = -1; 1278 1279 pthread_mutex_lock(&g_vhost_mutex); 1280 vsession = vhost_session_find_by_vid(vid); 1281 if (vsession == NULL) { 1282 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1283 goto out; 1284 } 1285 1286 vdev = vsession->vdev; 1287 if (vdev->backend->vhost_get_config) { 1288 rc = vdev->backend->vhost_get_config(vdev, config, len); 1289 } 1290 1291 out: 1292 pthread_mutex_unlock(&g_vhost_mutex); 1293 return rc; 1294 } 1295 1296 int 1297 vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1298 { 1299 struct spdk_vhost_session *vsession; 1300 struct spdk_vhost_dev *vdev; 1301 int rc = -1; 1302 1303 pthread_mutex_lock(&g_vhost_mutex); 1304 vsession = vhost_session_find_by_vid(vid); 1305 if (vsession == NULL) { 1306 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1307 goto out; 1308 } 1309 1310 vdev = vsession->vdev; 1311 if (vdev->backend->vhost_set_config) { 1312 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1313 } 1314 1315 out: 1316 pthread_mutex_unlock(&g_vhost_mutex); 1317 return rc; 1318 } 1319 #endif 1320 1321 int 1322 spdk_vhost_set_socket_path(const char *basename) 1323 { 1324 int ret; 1325 1326 if (basename && strlen(basename) > 0) { 1327 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1328 if (ret <= 0) { 1329 return -EINVAL; 1330 } 1331 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1332 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1333 return -EINVAL; 1334 } 1335 1336 if (dev_dirname[ret - 1] != '/') { 1337 dev_dirname[ret] = '/'; 1338 dev_dirname[ret + 1] = '\0'; 1339 } 1340 } 1341 1342 return 0; 1343 } 1344 1345 void 1346 vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1347 { 1348 assert(vdev->backend->dump_info_json != NULL); 1349 vdev->backend->dump_info_json(vdev, w); 1350 } 1351 1352 int 1353 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1354 { 1355 if (vdev->pending_async_op_num) { 1356 return -EBUSY; 1357 } 1358 1359 return vdev->backend->remove_device(vdev); 1360 } 1361 1362 int 1363 vhost_new_connection_cb(int vid, const char *ifname) 1364 { 1365 struct spdk_vhost_dev *vdev; 1366 struct spdk_vhost_session *vsession; 1367 1368 pthread_mutex_lock(&g_vhost_mutex); 1369 1370 vdev = spdk_vhost_dev_find(ifname); 1371 if (vdev == NULL) { 1372 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1373 pthread_mutex_unlock(&g_vhost_mutex); 1374 return -1; 1375 } 1376 1377 /* We expect sessions inside vdev->vsessions to be sorted in ascending 1378 * order in regard of vsession->id. For now we always set id = vsessions_cnt++ 1379 * and append each session to the very end of the vsessions list. 1380 * This is required for spdk_vhost_dev_foreach_session() to work. 1381 */ 1382 if (vdev->vsessions_num == UINT_MAX) { 1383 assert(false); 1384 return -EINVAL; 1385 } 1386 1387 if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) + 1388 vdev->backend->session_ctx_size)) { 1389 SPDK_ERRLOG("vsession alloc failed\n"); 1390 pthread_mutex_unlock(&g_vhost_mutex); 1391 return -1; 1392 } 1393 memset(vsession, 0, sizeof(*vsession) + vdev->backend->session_ctx_size); 1394 1395 vsession->vdev = vdev; 1396 vsession->vid = vid; 1397 vsession->id = vdev->vsessions_num++; 1398 vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid); 1399 if (vsession->name == NULL) { 1400 SPDK_ERRLOG("vsession alloc failed\n"); 1401 pthread_mutex_unlock(&g_vhost_mutex); 1402 free(vsession); 1403 return -1; 1404 } 1405 vsession->started = false; 1406 vsession->initialized = false; 1407 vsession->next_stats_check_time = 0; 1408 vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS * 1409 spdk_get_ticks_hz() / 1000UL; 1410 TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq); 1411 1412 vhost_session_install_rte_compat_hooks(vsession); 1413 pthread_mutex_unlock(&g_vhost_mutex); 1414 return 0; 1415 } 1416 1417 int 1418 vhost_destroy_connection_cb(int vid) 1419 { 1420 struct spdk_vhost_session *vsession; 1421 int rc = 0; 1422 1423 pthread_mutex_lock(&g_vhost_mutex); 1424 vsession = vhost_session_find_by_vid(vid); 1425 if (vsession == NULL) { 1426 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1427 pthread_mutex_unlock(&g_vhost_mutex); 1428 return -EINVAL; 1429 } 1430 1431 if (vsession->started) { 1432 rc = _stop_session(vsession); 1433 } 1434 1435 TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); 1436 free(vsession->name); 1437 free(vsession); 1438 pthread_mutex_unlock(&g_vhost_mutex); 1439 1440 return rc; 1441 } 1442 1443 void 1444 spdk_vhost_lock(void) 1445 { 1446 pthread_mutex_lock(&g_vhost_mutex); 1447 } 1448 1449 int 1450 spdk_vhost_trylock(void) 1451 { 1452 return -pthread_mutex_trylock(&g_vhost_mutex); 1453 } 1454 1455 void 1456 spdk_vhost_unlock(void) 1457 { 1458 pthread_mutex_unlock(&g_vhost_mutex); 1459 } 1460 1461 void 1462 spdk_vhost_init(spdk_vhost_init_cb init_cb) 1463 { 1464 size_t len; 1465 int ret; 1466 1467 g_vhost_init_thread = spdk_get_thread(); 1468 assert(g_vhost_init_thread != NULL); 1469 1470 if (dev_dirname[0] == '\0') { 1471 if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) { 1472 SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno)); 1473 ret = -1; 1474 goto out; 1475 } 1476 1477 len = strlen(dev_dirname); 1478 if (dev_dirname[len - 1] != '/') { 1479 dev_dirname[len] = '/'; 1480 dev_dirname[len + 1] = '\0'; 1481 } 1482 } 1483 1484 ret = sem_init(&g_dpdk_sem, 0, 0); 1485 if (ret != 0) { 1486 SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n"); 1487 ret = -1; 1488 goto out; 1489 } 1490 1491 ret = vhost_scsi_controller_construct(); 1492 if (ret != 0) { 1493 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1494 goto out; 1495 } 1496 1497 ret = vhost_blk_controller_construct(); 1498 if (ret != 0) { 1499 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1500 goto out; 1501 } 1502 1503 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1504 ret = vhost_nvme_controller_construct(); 1505 if (ret != 0) { 1506 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1507 goto out; 1508 } 1509 #endif 1510 1511 out: 1512 init_cb(ret); 1513 } 1514 1515 static void 1516 _spdk_vhost_fini(void *arg1) 1517 { 1518 struct spdk_vhost_dev *vdev, *tmp; 1519 1520 spdk_vhost_lock(); 1521 vdev = spdk_vhost_dev_next(NULL); 1522 while (vdev != NULL) { 1523 tmp = spdk_vhost_dev_next(vdev); 1524 spdk_vhost_dev_remove(vdev); 1525 /* don't care if it fails, there's nothing we can do for now */ 1526 vdev = tmp; 1527 } 1528 spdk_vhost_unlock(); 1529 1530 /* All devices are removed now. */ 1531 sem_destroy(&g_dpdk_sem); 1532 1533 g_fini_cpl_cb(); 1534 } 1535 1536 static void * 1537 session_shutdown(void *arg) 1538 { 1539 struct spdk_vhost_dev *vdev = NULL; 1540 1541 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 1542 vhost_driver_unregister(vdev->path); 1543 vdev->registered = false; 1544 } 1545 1546 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1547 spdk_thread_send_msg(g_vhost_init_thread, _spdk_vhost_fini, NULL); 1548 return NULL; 1549 } 1550 1551 void 1552 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1553 { 1554 pthread_t tid; 1555 int rc; 1556 1557 assert(spdk_get_thread() == g_vhost_init_thread); 1558 g_fini_cpl_cb = fini_cb; 1559 1560 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1561 * ops for stopping a device or removing a connection, we need to call it from 1562 * a separate thread to avoid deadlock. 1563 */ 1564 rc = pthread_create(&tid, NULL, &session_shutdown, NULL); 1565 if (rc < 0) { 1566 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1567 abort(); 1568 } 1569 pthread_detach(tid); 1570 } 1571 1572 void 1573 spdk_vhost_config_json(struct spdk_json_write_ctx *w) 1574 { 1575 struct spdk_vhost_dev *vdev; 1576 uint32_t delay_base_us; 1577 uint32_t iops_threshold; 1578 1579 spdk_json_write_array_begin(w); 1580 1581 spdk_vhost_lock(); 1582 vdev = spdk_vhost_dev_next(NULL); 1583 while (vdev != NULL) { 1584 vdev->backend->write_config_json(vdev, w); 1585 1586 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1587 if (delay_base_us) { 1588 spdk_json_write_object_begin(w); 1589 spdk_json_write_named_string(w, "method", "vhost_controller_set_coalescing"); 1590 1591 spdk_json_write_named_object_begin(w, "params"); 1592 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1593 spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us); 1594 spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold); 1595 spdk_json_write_object_end(w); 1596 1597 spdk_json_write_object_end(w); 1598 } 1599 vdev = spdk_vhost_dev_next(vdev); 1600 } 1601 spdk_vhost_unlock(); 1602 1603 spdk_json_write_array_end(w); 1604 } 1605 1606 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1607 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1608