1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/env.h" 37 #include "spdk/likely.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 #include "spdk/memory.h" 41 #include "spdk/barrier.h" 42 #include "spdk/vhost.h" 43 #include "vhost_internal.h" 44 45 /* Path to folder where character device will be created. Can be set by user. */ 46 static char dev_dirname[PATH_MAX] = ""; 47 48 /* Thread performing all vhost management operations */ 49 static struct spdk_thread *g_vhost_init_thread; 50 51 static spdk_vhost_fini_cb g_fini_cpl_cb; 52 53 /** 54 * DPDK calls our callbacks synchronously but the work those callbacks 55 * perform needs to be async. Luckily, all DPDK callbacks are called on 56 * a DPDK-internal pthread, so we'll just wait on a semaphore in there. 57 */ 58 static sem_t g_dpdk_sem; 59 60 /** Return code for the current DPDK callback */ 61 static int g_dpdk_response; 62 63 struct vhost_session_fn_ctx { 64 /** Device pointer obtained before enqueuing the event */ 65 struct spdk_vhost_dev *vdev; 66 67 /** ID of the session to send event to. */ 68 uint32_t vsession_id; 69 70 /** User provided function to be executed on session's thread. */ 71 spdk_vhost_session_fn cb_fn; 72 73 /** 74 * User provided function to be called on the init thread 75 * after iterating through all sessions. 76 */ 77 spdk_vhost_dev_fn cpl_fn; 78 79 /** Custom user context */ 80 void *user_ctx; 81 }; 82 83 static TAILQ_HEAD(, spdk_vhost_dev) g_vhost_devices = TAILQ_HEAD_INITIALIZER( 84 g_vhost_devices); 85 static pthread_mutex_t g_vhost_mutex = PTHREAD_MUTEX_INITIALIZER; 86 87 void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len) 88 { 89 void *vva; 90 uint64_t newlen; 91 92 newlen = len; 93 vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen); 94 if (newlen != len) { 95 return NULL; 96 } 97 98 return vva; 99 100 } 101 102 static void 103 vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 104 uint16_t req_id) 105 { 106 struct vring_desc *desc, *desc_table; 107 uint32_t desc_table_size; 108 int rc; 109 110 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 111 return; 112 } 113 114 rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size); 115 if (spdk_unlikely(rc != 0)) { 116 SPDK_ERRLOG("Can't log used ring descriptors!\n"); 117 return; 118 } 119 120 do { 121 if (vhost_vring_desc_is_wr(desc)) { 122 /* To be honest, only pages realy touched should be logged, but 123 * doing so would require tracking those changes in each backed. 124 * Also backend most likely will touch all/most of those pages so 125 * for lets assume we touched all pages passed to as writeable buffers. */ 126 rte_vhost_log_write(vsession->vid, desc->addr, desc->len); 127 } 128 vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 129 } while (desc); 130 } 131 132 static void 133 vhost_log_used_vring_elem(struct spdk_vhost_session *vsession, 134 struct spdk_vhost_virtqueue *virtqueue, 135 uint16_t idx) 136 { 137 uint64_t offset, len; 138 139 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 140 return; 141 } 142 143 if (spdk_unlikely(virtqueue->packed.packed_ring)) { 144 offset = idx * sizeof(struct vring_packed_desc); 145 len = sizeof(struct vring_packed_desc); 146 } else { 147 offset = offsetof(struct vring_used, ring[idx]); 148 len = sizeof(virtqueue->vring.used->ring[idx]); 149 } 150 151 rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len); 152 } 153 154 static void 155 vhost_log_used_vring_idx(struct spdk_vhost_session *vsession, 156 struct spdk_vhost_virtqueue *virtqueue) 157 { 158 uint64_t offset, len; 159 uint16_t vq_idx; 160 161 if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) { 162 return; 163 } 164 165 offset = offsetof(struct vring_used, idx); 166 len = sizeof(virtqueue->vring.used->idx); 167 vq_idx = virtqueue - vsession->virtqueue; 168 169 rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len); 170 } 171 172 /* 173 * Get available requests from avail ring. 174 */ 175 uint16_t 176 vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs, 177 uint16_t reqs_len) 178 { 179 struct rte_vhost_vring *vring = &virtqueue->vring; 180 struct vring_avail *avail = vring->avail; 181 uint16_t size_mask = vring->size - 1; 182 uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx; 183 uint16_t count, i; 184 185 count = avail_idx - last_idx; 186 if (spdk_likely(count == 0)) { 187 return 0; 188 } 189 190 if (spdk_unlikely(count > vring->size)) { 191 /* TODO: the queue is unrecoverably broken and should be marked so. 192 * For now we will fail silently and report there are no new avail entries. 193 */ 194 return 0; 195 } 196 197 count = spdk_min(count, reqs_len); 198 virtqueue->last_avail_idx += count; 199 for (i = 0; i < count; i++) { 200 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask]; 201 } 202 203 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 204 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 205 last_idx, avail_idx, count); 206 207 return count; 208 } 209 210 static bool 211 vhost_vring_desc_is_indirect(struct vring_desc *cur_desc) 212 { 213 return !!(cur_desc->flags & VRING_DESC_F_INDIRECT); 214 } 215 216 static bool 217 vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc) 218 { 219 return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0; 220 } 221 222 int 223 vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue, 224 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table, 225 uint32_t *desc_table_size) 226 { 227 if (spdk_unlikely(req_idx >= virtqueue->vring.size)) { 228 return -1; 229 } 230 231 *desc = &virtqueue->vring.desc[req_idx]; 232 233 if (vhost_vring_desc_is_indirect(*desc)) { 234 *desc_table_size = (*desc)->len / sizeof(**desc); 235 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr, 236 sizeof(**desc) * *desc_table_size); 237 *desc = *desc_table; 238 if (*desc == NULL) { 239 return -1; 240 } 241 242 return 0; 243 } 244 245 *desc_table = virtqueue->vring.desc; 246 *desc_table_size = virtqueue->vring.size; 247 248 return 0; 249 } 250 251 int 252 vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession, 253 struct spdk_vhost_virtqueue *virtqueue, 254 uint16_t req_idx, struct vring_packed_desc **desc, 255 struct vring_packed_desc **desc_table, uint32_t *desc_table_size) 256 { 257 *desc = &virtqueue->vring.desc_packed[req_idx]; 258 259 /* In packed ring when the desc is non-indirect we get next desc 260 * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc 261 * is indirect we get next desc by idx and desc_table_size. It's 262 * different from split ring. 263 */ 264 if (vhost_vring_packed_desc_is_indirect(*desc)) { 265 *desc_table_size = (*desc)->len / sizeof(struct vring_packed_desc); 266 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr, 267 (*desc)->len); 268 *desc = *desc_table; 269 if (spdk_unlikely(*desc == NULL)) { 270 return -1; 271 } 272 } else { 273 *desc_table = NULL; 274 *desc_table_size = 0; 275 } 276 277 return 0; 278 } 279 280 int 281 vhost_vq_used_signal(struct spdk_vhost_session *vsession, 282 struct spdk_vhost_virtqueue *virtqueue) 283 { 284 if (virtqueue->used_req_cnt == 0) { 285 return 0; 286 } 287 288 virtqueue->req_cnt += virtqueue->used_req_cnt; 289 virtqueue->used_req_cnt = 0; 290 291 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 292 "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n", 293 virtqueue - vsession->virtqueue, virtqueue->last_used_idx); 294 295 if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) { 296 /* interrupt signalled */ 297 return 1; 298 } else { 299 /* interrupt not signalled */ 300 return 0; 301 } 302 } 303 304 305 static void 306 check_session_io_stats(struct spdk_vhost_session *vsession, uint64_t now) 307 { 308 struct spdk_vhost_virtqueue *virtqueue; 309 uint32_t irq_delay_base = vsession->coalescing_delay_time_base; 310 uint32_t io_threshold = vsession->coalescing_io_rate_threshold; 311 int32_t irq_delay; 312 uint32_t req_cnt; 313 uint16_t q_idx; 314 315 if (now < vsession->next_stats_check_time) { 316 return; 317 } 318 319 vsession->next_stats_check_time = now + vsession->stats_check_interval; 320 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 321 virtqueue = &vsession->virtqueue[q_idx]; 322 323 req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt; 324 if (req_cnt <= io_threshold) { 325 continue; 326 } 327 328 irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold; 329 virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay); 330 331 virtqueue->req_cnt = 0; 332 virtqueue->next_event_time = now; 333 } 334 } 335 336 static inline bool 337 vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq) 338 { 339 if (spdk_unlikely(vq->packed.packed_ring)) { 340 if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) { 341 return true; 342 } 343 } else { 344 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { 345 return true; 346 } 347 } 348 349 return false; 350 } 351 352 void 353 vhost_session_used_signal(struct spdk_vhost_session *vsession) 354 { 355 struct spdk_vhost_virtqueue *virtqueue; 356 uint64_t now; 357 uint16_t q_idx; 358 359 if (vsession->coalescing_delay_time_base == 0) { 360 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 361 virtqueue = &vsession->virtqueue[q_idx]; 362 363 if (virtqueue->vring.desc == NULL) { 364 continue; 365 } 366 367 if (vhost_vq_event_is_suppressed(virtqueue)) { 368 continue; 369 } 370 371 vhost_vq_used_signal(vsession, virtqueue); 372 } 373 } else { 374 now = spdk_get_ticks(); 375 check_session_io_stats(vsession, now); 376 377 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 378 virtqueue = &vsession->virtqueue[q_idx]; 379 380 /* No need for event right now */ 381 if (now < virtqueue->next_event_time) { 382 continue; 383 } 384 385 if (vhost_vq_event_is_suppressed(virtqueue)) { 386 continue; 387 } 388 389 if (!vhost_vq_used_signal(vsession, virtqueue)) { 390 continue; 391 } 392 393 /* Syscall is quite long so update time */ 394 now = spdk_get_ticks(); 395 virtqueue->next_event_time = now + virtqueue->irq_delay_time; 396 } 397 } 398 } 399 400 static int 401 vhost_session_set_coalescing(struct spdk_vhost_dev *vdev, 402 struct spdk_vhost_session *vsession, void *ctx) 403 { 404 vsession->coalescing_delay_time_base = 405 vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL; 406 vsession->coalescing_io_rate_threshold = 407 vdev->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 408 return 0; 409 } 410 411 static int 412 vhost_dev_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 413 uint32_t iops_threshold) 414 { 415 uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL; 416 uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U; 417 418 if (delay_time_base >= UINT32_MAX) { 419 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us); 420 return -EINVAL; 421 } else if (io_rate == 0) { 422 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate, 423 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS); 424 return -EINVAL; 425 } 426 427 vdev->coalescing_delay_us = delay_base_us; 428 vdev->coalescing_iops_threshold = iops_threshold; 429 return 0; 430 } 431 432 int 433 spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us, 434 uint32_t iops_threshold) 435 { 436 int rc; 437 438 rc = vhost_dev_set_coalescing(vdev, delay_base_us, iops_threshold); 439 if (rc != 0) { 440 return rc; 441 } 442 443 vhost_dev_foreach_session(vdev, vhost_session_set_coalescing, NULL, NULL); 444 return 0; 445 } 446 447 void 448 spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us, 449 uint32_t *iops_threshold) 450 { 451 if (delay_base_us) { 452 *delay_base_us = vdev->coalescing_delay_us; 453 } 454 455 if (iops_threshold) { 456 *iops_threshold = vdev->coalescing_iops_threshold; 457 } 458 } 459 460 /* 461 * Enqueue id and len to used ring. 462 */ 463 void 464 vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession, 465 struct spdk_vhost_virtqueue *virtqueue, 466 uint16_t id, uint32_t len) 467 { 468 struct rte_vhost_vring *vring = &virtqueue->vring; 469 struct vring_used *used = vring->used; 470 uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1); 471 uint16_t vq_idx = virtqueue->vring_idx; 472 473 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 474 "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 475 virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len); 476 477 vhost_log_req_desc(vsession, virtqueue, id); 478 479 virtqueue->last_used_idx++; 480 used->ring[last_idx].id = id; 481 used->ring[last_idx].len = len; 482 483 /* Ensure the used ring is updated before we log it or increment used->idx. */ 484 spdk_smp_wmb(); 485 486 rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id); 487 488 vhost_log_used_vring_elem(vsession, virtqueue, last_idx); 489 * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx; 490 vhost_log_used_vring_idx(vsession, virtqueue); 491 492 rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id); 493 494 virtqueue->used_req_cnt++; 495 } 496 497 void 498 vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession, 499 struct spdk_vhost_virtqueue *virtqueue, 500 uint16_t num_descs, uint16_t buffer_id, 501 uint32_t length) 502 { 503 struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx]; 504 bool used, avail; 505 506 SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING, 507 "Queue %td - RING: buffer_id=%"PRIu16"\n", 508 virtqueue - vsession->virtqueue, buffer_id); 509 510 /* When the descriptor is used, two flags in descriptor 511 * avail flag and used flag are set to equal 512 * and used flag value == used_wrap_counter. 513 */ 514 used = !!(desc->flags & VRING_DESC_F_USED); 515 avail = !!(desc->flags & VRING_DESC_F_AVAIL); 516 if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) { 517 SPDK_ERRLOG("descriptor has been used before\n"); 518 return; 519 } 520 521 /* In used desc addr is unused and len specifies the buffer length 522 * that has been written to by the device. 523 */ 524 desc->addr = 0; 525 desc->len = length; 526 527 /* This bit specifies whether any data has been written by the device */ 528 if (length != 0) { 529 desc->flags |= VRING_DESC_F_WRITE; 530 } 531 532 /* Buffer ID is included in the last descriptor in the list. 533 * The driver needs to keep track of the size of the list corresponding 534 * to each buffer ID. 535 */ 536 desc->id = buffer_id; 537 538 /* A device MUST NOT make the descriptor used before buffer_id is 539 * written to the descriptor. 540 */ 541 spdk_smp_wmb(); 542 /* To mark a desc as used, the device sets the F_USED bit in flags to match 543 * the internal Device ring wrap counter. It also sets the F_AVAIL bit to 544 * match the same value. 545 */ 546 if (virtqueue->packed.used_phase) { 547 desc->flags |= VRING_DESC_F_AVAIL_USED; 548 } else { 549 desc->flags &= ~VRING_DESC_F_AVAIL_USED; 550 } 551 552 vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx); 553 virtqueue->last_used_idx += num_descs; 554 if (virtqueue->last_used_idx >= virtqueue->vring.size) { 555 virtqueue->last_used_idx -= virtqueue->vring.size; 556 virtqueue->packed.used_phase = !virtqueue->packed.used_phase; 557 } 558 559 virtqueue->used_req_cnt++; 560 } 561 562 bool 563 vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue) 564 { 565 uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags; 566 567 /* To mark a desc as available, the driver sets the F_AVAIL bit in flags 568 * to match the internal avail wrap counter. It also sets the F_USED bit to 569 * match the inverse value but it's not mandatory. 570 */ 571 return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase); 572 } 573 574 bool 575 vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc) 576 { 577 return (cur_desc->flags & VRING_DESC_F_WRITE) != 0; 578 } 579 580 int 581 vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx, 582 struct spdk_vhost_virtqueue *vq, 583 struct vring_packed_desc *desc_table, 584 uint32_t desc_table_size) 585 { 586 if (desc_table != NULL) { 587 /* When the desc_table isn't NULL means it's indirect and we get the next 588 * desc by req_idx and desc_table_size. The return value is NULL means 589 * we reach the last desc of this request. 590 */ 591 (*req_idx)++; 592 if (*req_idx < desc_table_size) { 593 *desc = &desc_table[*req_idx]; 594 } else { 595 *desc = NULL; 596 } 597 } else { 598 /* When the desc_table is NULL means it's non-indirect and we get the next 599 * desc by req_idx and F_NEXT in flags. The return value is NULL means 600 * we reach the last desc of this request. When return new desc 601 * we update the req_idx too. 602 */ 603 if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) { 604 *desc = NULL; 605 return 0; 606 } 607 608 *req_idx = (*req_idx + 1) % vq->vring.size; 609 *desc = &vq->vring.desc_packed[*req_idx]; 610 } 611 612 return 0; 613 } 614 615 static int 616 vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 617 uint16_t *iov_index, uintptr_t payload, uint64_t remaining) 618 { 619 uintptr_t vva; 620 uint64_t len; 621 622 do { 623 if (*iov_index >= SPDK_VHOST_IOVS_MAX) { 624 SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX); 625 return -1; 626 } 627 len = remaining; 628 vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len); 629 if (vva == 0 || len == 0) { 630 SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload); 631 return -1; 632 } 633 iov[*iov_index].iov_base = (void *)vva; 634 iov[*iov_index].iov_len = len; 635 remaining -= len; 636 payload += len; 637 (*iov_index)++; 638 } while (remaining); 639 640 return 0; 641 } 642 643 int 644 vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 645 uint16_t *iov_index, const struct vring_packed_desc *desc) 646 { 647 return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index, 648 desc->addr, desc->len); 649 } 650 651 /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx. 652 * 2, Update the vq->last_avail_idx to point next available desc chain. 653 * 3, Update the avail_wrap_counter if last_avail_idx overturn. 654 */ 655 uint16_t 656 vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 657 uint16_t *num_descs) 658 { 659 struct vring_packed_desc *desc; 660 uint16_t desc_head = req_idx; 661 662 *num_descs = 1; 663 664 desc = &vq->vring.desc_packed[req_idx]; 665 if (!vhost_vring_packed_desc_is_indirect(desc)) { 666 while ((desc->flags & VRING_DESC_F_NEXT) != 0) { 667 req_idx = (req_idx + 1) % vq->vring.size; 668 desc = &vq->vring.desc_packed[req_idx]; 669 (*num_descs)++; 670 } 671 } 672 673 /* Queue Size doesn't have to be a power of 2 674 * Device maintains last_avail_idx so we can make sure 675 * the value is valid(0 ~ vring.size - 1) 676 */ 677 vq->last_avail_idx = (req_idx + 1) % vq->vring.size; 678 if (vq->last_avail_idx < desc_head) { 679 vq->packed.avail_phase = !vq->packed.avail_phase; 680 } 681 682 return desc->id; 683 } 684 685 int 686 vhost_vring_desc_get_next(struct vring_desc **desc, 687 struct vring_desc *desc_table, uint32_t desc_table_size) 688 { 689 struct vring_desc *old_desc = *desc; 690 uint16_t next_idx; 691 692 if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) { 693 *desc = NULL; 694 return 0; 695 } 696 697 next_idx = old_desc->next; 698 if (spdk_unlikely(next_idx >= desc_table_size)) { 699 *desc = NULL; 700 return -1; 701 } 702 703 *desc = &desc_table[next_idx]; 704 return 0; 705 } 706 707 int 708 vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov, 709 uint16_t *iov_index, const struct vring_desc *desc) 710 { 711 return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index, 712 desc->addr, desc->len); 713 } 714 715 static struct spdk_vhost_session * 716 vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id) 717 { 718 struct spdk_vhost_session *vsession; 719 720 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 721 if (vsession->id == id) { 722 return vsession; 723 } 724 } 725 726 return NULL; 727 } 728 729 struct spdk_vhost_session * 730 vhost_session_find_by_vid(int vid) 731 { 732 struct spdk_vhost_dev *vdev; 733 struct spdk_vhost_session *vsession; 734 735 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 736 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 737 if (vsession->vid == vid) { 738 return vsession; 739 } 740 } 741 } 742 743 return NULL; 744 } 745 746 struct spdk_vhost_dev * 747 spdk_vhost_dev_next(struct spdk_vhost_dev *vdev) 748 { 749 if (vdev == NULL) { 750 return TAILQ_FIRST(&g_vhost_devices); 751 } 752 753 return TAILQ_NEXT(vdev, tailq); 754 } 755 756 struct spdk_vhost_dev * 757 spdk_vhost_dev_find(const char *ctrlr_name) 758 { 759 struct spdk_vhost_dev *vdev; 760 size_t dev_dirname_len = strlen(dev_dirname); 761 762 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 763 ctrlr_name += dev_dirname_len; 764 } 765 766 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 767 if (strcmp(vdev->name, ctrlr_name) == 0) { 768 return vdev; 769 } 770 } 771 772 return NULL; 773 } 774 775 static int 776 vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 777 { 778 int rc; 779 780 if (cpumask == NULL) { 781 return -1; 782 } 783 784 if (mask == NULL) { 785 spdk_cpuset_copy(cpumask, spdk_app_get_core_mask()); 786 return 0; 787 } 788 789 rc = spdk_app_parse_core_mask(mask, cpumask); 790 if (rc < 0) { 791 SPDK_ERRLOG("invalid cpumask %s\n", mask); 792 return -1; 793 } 794 795 if (spdk_cpuset_count(cpumask) == 0) { 796 SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n", 797 spdk_cpuset_fmt(spdk_app_get_core_mask())); 798 return -1; 799 } 800 801 return 0; 802 } 803 804 static void 805 vhost_dev_thread_exit(void *arg1) 806 { 807 spdk_thread_exit(spdk_get_thread()); 808 } 809 810 int 811 vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str, 812 const struct spdk_vhost_dev_backend *backend) 813 { 814 char path[PATH_MAX]; 815 struct spdk_cpuset cpumask = {}; 816 int rc; 817 818 assert(vdev); 819 if (name == NULL) { 820 SPDK_ERRLOG("Can't register controller with no name\n"); 821 return -EINVAL; 822 } 823 824 if (vhost_parse_core_mask(mask_str, &cpumask) != 0) { 825 SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n", 826 mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask())); 827 return -EINVAL; 828 } 829 830 if (spdk_vhost_dev_find(name)) { 831 SPDK_ERRLOG("vhost controller %s already exists.\n", name); 832 return -EEXIST; 833 } 834 835 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 836 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, 837 name); 838 return -EINVAL; 839 } 840 841 vdev->name = strdup(name); 842 vdev->path = strdup(path); 843 if (vdev->name == NULL || vdev->path == NULL) { 844 rc = -EIO; 845 goto out; 846 } 847 848 vdev->thread = spdk_thread_create(vdev->name, &cpumask); 849 if (vdev->thread == NULL) { 850 SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name); 851 rc = -EIO; 852 goto out; 853 } 854 855 vdev->registered = true; 856 vdev->backend = backend; 857 TAILQ_INIT(&vdev->vsessions); 858 859 vhost_dev_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US, 860 SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD); 861 862 if (vhost_register_unix_socket(path, name, vdev->virtio_features, vdev->disabled_features, 863 vdev->protocol_features)) { 864 spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL); 865 rc = -EIO; 866 goto out; 867 } 868 869 TAILQ_INSERT_TAIL(&g_vhost_devices, vdev, tailq); 870 871 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name); 872 return 0; 873 874 out: 875 free(vdev->name); 876 free(vdev->path); 877 return rc; 878 } 879 880 int 881 vhost_dev_unregister(struct spdk_vhost_dev *vdev) 882 { 883 if (!TAILQ_EMPTY(&vdev->vsessions)) { 884 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name); 885 return -EBUSY; 886 } 887 888 if (vdev->registered && vhost_driver_unregister(vdev->path) != 0) { 889 SPDK_ERRLOG("Could not unregister controller %s with vhost library\n" 890 "Check if domain socket %s still exists\n", 891 vdev->name, vdev->path); 892 return -EIO; 893 } 894 895 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name); 896 897 spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL); 898 899 free(vdev->name); 900 free(vdev->path); 901 TAILQ_REMOVE(&g_vhost_devices, vdev, tailq); 902 return 0; 903 } 904 905 const char * 906 spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev) 907 { 908 assert(vdev != NULL); 909 return vdev->name; 910 } 911 912 const struct spdk_cpuset * 913 spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev) 914 { 915 assert(vdev != NULL); 916 return spdk_thread_get_cpumask(vdev->thread); 917 } 918 919 static void 920 wait_for_semaphore(int timeout_sec, const char *errmsg) 921 { 922 struct timespec timeout; 923 int rc; 924 925 clock_gettime(CLOCK_REALTIME, &timeout); 926 timeout.tv_sec += timeout_sec; 927 rc = sem_timedwait(&g_dpdk_sem, &timeout); 928 if (rc != 0) { 929 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg); 930 sem_wait(&g_dpdk_sem); 931 } 932 } 933 934 static void 935 vhost_session_cb_done(int rc) 936 { 937 g_dpdk_response = rc; 938 sem_post(&g_dpdk_sem); 939 } 940 941 void 942 vhost_session_start_done(struct spdk_vhost_session *vsession, int response) 943 { 944 if (response == 0) { 945 vsession->started = true; 946 947 assert(vsession->vdev->active_session_num < UINT32_MAX); 948 vsession->vdev->active_session_num++; 949 } 950 951 vhost_session_cb_done(response); 952 } 953 954 void 955 vhost_session_stop_done(struct spdk_vhost_session *vsession, int response) 956 { 957 if (response == 0) { 958 vsession->started = false; 959 960 assert(vsession->vdev->active_session_num > 0); 961 vsession->vdev->active_session_num--; 962 } 963 964 vhost_session_cb_done(response); 965 } 966 967 static void 968 vhost_event_cb(void *arg1) 969 { 970 struct vhost_session_fn_ctx *ctx = arg1; 971 struct spdk_vhost_session *vsession; 972 973 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 974 spdk_thread_send_msg(spdk_get_thread(), vhost_event_cb, arg1); 975 return; 976 } 977 978 vsession = vhost_session_find_by_id(ctx->vdev, ctx->vsession_id); 979 ctx->cb_fn(ctx->vdev, vsession, NULL); 980 pthread_mutex_unlock(&g_vhost_mutex); 981 } 982 983 int 984 vhost_session_send_event(struct spdk_vhost_session *vsession, 985 spdk_vhost_session_fn cb_fn, unsigned timeout_sec, 986 const char *errmsg) 987 { 988 struct vhost_session_fn_ctx ev_ctx = {0}; 989 struct spdk_vhost_dev *vdev = vsession->vdev; 990 991 ev_ctx.vdev = vdev; 992 ev_ctx.vsession_id = vsession->id; 993 ev_ctx.cb_fn = cb_fn; 994 995 spdk_thread_send_msg(vdev->thread, vhost_event_cb, &ev_ctx); 996 997 pthread_mutex_unlock(&g_vhost_mutex); 998 wait_for_semaphore(timeout_sec, errmsg); 999 pthread_mutex_lock(&g_vhost_mutex); 1000 1001 return g_dpdk_response; 1002 } 1003 1004 static void 1005 foreach_session_finish_cb(void *arg1) 1006 { 1007 struct vhost_session_fn_ctx *ev_ctx = arg1; 1008 struct spdk_vhost_dev *vdev = ev_ctx->vdev; 1009 1010 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 1011 spdk_thread_send_msg(spdk_get_thread(), 1012 foreach_session_finish_cb, arg1); 1013 return; 1014 } 1015 1016 assert(vdev->pending_async_op_num > 0); 1017 vdev->pending_async_op_num--; 1018 if (ev_ctx->cpl_fn != NULL) { 1019 ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx); 1020 } 1021 1022 pthread_mutex_unlock(&g_vhost_mutex); 1023 free(ev_ctx); 1024 } 1025 1026 static void 1027 foreach_session(void *arg1) 1028 { 1029 struct vhost_session_fn_ctx *ev_ctx = arg1; 1030 struct spdk_vhost_session *vsession; 1031 struct spdk_vhost_dev *vdev = ev_ctx->vdev; 1032 int rc; 1033 1034 if (pthread_mutex_trylock(&g_vhost_mutex) != 0) { 1035 spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1); 1036 return; 1037 } 1038 1039 TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { 1040 if (vsession->initialized) { 1041 rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx); 1042 if (rc < 0) { 1043 goto out; 1044 } 1045 } 1046 } 1047 1048 out: 1049 pthread_mutex_unlock(&g_vhost_mutex); 1050 1051 spdk_thread_send_msg(g_vhost_init_thread, foreach_session_finish_cb, arg1); 1052 } 1053 1054 void 1055 vhost_dev_foreach_session(struct spdk_vhost_dev *vdev, 1056 spdk_vhost_session_fn fn, 1057 spdk_vhost_dev_fn cpl_fn, 1058 void *arg) 1059 { 1060 struct vhost_session_fn_ctx *ev_ctx; 1061 1062 ev_ctx = calloc(1, sizeof(*ev_ctx)); 1063 if (ev_ctx == NULL) { 1064 SPDK_ERRLOG("Failed to alloc vhost event.\n"); 1065 assert(false); 1066 return; 1067 } 1068 1069 ev_ctx->vdev = vdev; 1070 ev_ctx->cb_fn = fn; 1071 ev_ctx->cpl_fn = cpl_fn; 1072 ev_ctx->user_ctx = arg; 1073 1074 assert(vdev->pending_async_op_num < UINT32_MAX); 1075 vdev->pending_async_op_num++; 1076 1077 spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx); 1078 } 1079 1080 static int 1081 _stop_session(struct spdk_vhost_session *vsession) 1082 { 1083 struct spdk_vhost_dev *vdev = vsession->vdev; 1084 struct spdk_vhost_virtqueue *q; 1085 int rc; 1086 uint16_t i; 1087 1088 rc = vdev->backend->stop_session(vsession); 1089 if (rc != 0) { 1090 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid); 1091 pthread_mutex_unlock(&g_vhost_mutex); 1092 return rc; 1093 } 1094 1095 for (i = 0; i < vsession->max_queues; i++) { 1096 q = &vsession->virtqueue[i]; 1097 1098 /* vring.desc and vring.desc_packed are in a union struct 1099 * so q->vring.desc can replace q->vring.desc_packed. 1100 */ 1101 if (q->vring.desc == NULL) { 1102 continue; 1103 } 1104 1105 /* Packed virtqueues support up to 2^15 entries each 1106 * so left one bit can be used as wrap counter. 1107 */ 1108 if (q->packed.packed_ring) { 1109 q->last_avail_idx = q->last_avail_idx | 1110 ((uint16_t)q->packed.avail_phase << 15); 1111 q->last_used_idx = q->last_used_idx | 1112 ((uint16_t)q->packed.used_phase << 15); 1113 } 1114 1115 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx); 1116 } 1117 1118 vhost_session_mem_unregister(vsession->mem); 1119 free(vsession->mem); 1120 1121 return 0; 1122 } 1123 1124 int 1125 vhost_stop_device_cb(int vid) 1126 { 1127 struct spdk_vhost_session *vsession; 1128 int rc; 1129 1130 pthread_mutex_lock(&g_vhost_mutex); 1131 vsession = vhost_session_find_by_vid(vid); 1132 if (vsession == NULL) { 1133 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1134 pthread_mutex_unlock(&g_vhost_mutex); 1135 return -EINVAL; 1136 } 1137 1138 if (!vsession->started) { 1139 /* already stopped, nothing to do */ 1140 pthread_mutex_unlock(&g_vhost_mutex); 1141 return -EALREADY; 1142 } 1143 1144 rc = _stop_session(vsession); 1145 pthread_mutex_unlock(&g_vhost_mutex); 1146 1147 return rc; 1148 } 1149 1150 int 1151 vhost_start_device_cb(int vid) 1152 { 1153 struct spdk_vhost_dev *vdev; 1154 struct spdk_vhost_session *vsession; 1155 int rc = -1; 1156 uint16_t i; 1157 bool packed_ring; 1158 1159 pthread_mutex_lock(&g_vhost_mutex); 1160 1161 vsession = vhost_session_find_by_vid(vid); 1162 if (vsession == NULL) { 1163 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1164 goto out; 1165 } 1166 1167 vdev = vsession->vdev; 1168 if (vsession->started) { 1169 /* already started, nothing to do */ 1170 rc = 0; 1171 goto out; 1172 } 1173 1174 if (vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) { 1175 SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid); 1176 goto out; 1177 } 1178 1179 packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0); 1180 1181 vsession->max_queues = 0; 1182 memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue)); 1183 for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) { 1184 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1185 1186 q->vring_idx = -1; 1187 if (rte_vhost_get_vhost_vring(vid, i, &q->vring)) { 1188 continue; 1189 } 1190 q->vring_idx = i; 1191 rte_vhost_get_vhost_ring_inflight(vid, i, &q->vring_inflight); 1192 1193 /* vring.desc and vring.desc_packed are in a union struct 1194 * so q->vring.desc can replace q->vring.desc_packed. 1195 */ 1196 if (q->vring.desc == NULL || q->vring.size == 0) { 1197 continue; 1198 } 1199 1200 if (rte_vhost_get_vring_base(vsession->vid, i, &q->last_avail_idx, &q->last_used_idx)) { 1201 q->vring.desc = NULL; 1202 continue; 1203 } 1204 1205 if (packed_ring) { 1206 /* Packed virtqueues support up to 2^15 entries each 1207 * so left one bit can be used as wrap counter. 1208 */ 1209 q->packed.avail_phase = q->last_avail_idx >> 15; 1210 q->last_avail_idx = q->last_avail_idx & 0x7FFF; 1211 q->packed.used_phase = q->last_used_idx >> 15; 1212 q->last_used_idx = q->last_used_idx & 0x7FFF; 1213 1214 /* Disable I/O submission notifications, we'll be polling. */ 1215 q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; 1216 } else { 1217 /* Disable I/O submission notifications, we'll be polling. */ 1218 q->vring.used->flags = VRING_USED_F_NO_NOTIFY; 1219 } 1220 1221 q->packed.packed_ring = packed_ring; 1222 vsession->max_queues = i + 1; 1223 } 1224 1225 if (vhost_get_mem_table(vid, &vsession->mem) != 0) { 1226 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid); 1227 goto out; 1228 } 1229 1230 /* 1231 * Not sure right now but this look like some kind of QEMU bug and guest IO 1232 * might be frozed without kicking all queues after live-migration. This look like 1233 * the previous vhost instance failed to effectively deliver all interrupts before 1234 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts 1235 * should be ignored by guest virtio driver. 1236 * 1237 * Tested on QEMU 2.10.91 and 2.11.50. 1238 */ 1239 for (i = 0; i < vsession->max_queues; i++) { 1240 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i]; 1241 1242 /* vring.desc and vring.desc_packed are in a union struct 1243 * so q->vring.desc can replace q->vring.desc_packed. 1244 */ 1245 if (q->vring.desc != NULL && q->vring.size > 0) { 1246 rte_vhost_vring_call(vsession->vid, q->vring_idx); 1247 } 1248 } 1249 1250 vhost_session_set_coalescing(vdev, vsession, NULL); 1251 vhost_session_mem_register(vsession->mem); 1252 vsession->initialized = true; 1253 rc = vdev->backend->start_session(vsession); 1254 if (rc != 0) { 1255 vhost_session_mem_unregister(vsession->mem); 1256 free(vsession->mem); 1257 goto out; 1258 } 1259 1260 out: 1261 pthread_mutex_unlock(&g_vhost_mutex); 1262 return rc; 1263 } 1264 1265 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1266 int 1267 vhost_get_config_cb(int vid, uint8_t *config, uint32_t len) 1268 { 1269 struct spdk_vhost_session *vsession; 1270 struct spdk_vhost_dev *vdev; 1271 int rc = -1; 1272 1273 pthread_mutex_lock(&g_vhost_mutex); 1274 vsession = vhost_session_find_by_vid(vid); 1275 if (vsession == NULL) { 1276 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1277 goto out; 1278 } 1279 1280 vdev = vsession->vdev; 1281 if (vdev->backend->vhost_get_config) { 1282 rc = vdev->backend->vhost_get_config(vdev, config, len); 1283 } 1284 1285 out: 1286 pthread_mutex_unlock(&g_vhost_mutex); 1287 return rc; 1288 } 1289 1290 int 1291 vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) 1292 { 1293 struct spdk_vhost_session *vsession; 1294 struct spdk_vhost_dev *vdev; 1295 int rc = -1; 1296 1297 pthread_mutex_lock(&g_vhost_mutex); 1298 vsession = vhost_session_find_by_vid(vid); 1299 if (vsession == NULL) { 1300 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1301 goto out; 1302 } 1303 1304 vdev = vsession->vdev; 1305 if (vdev->backend->vhost_set_config) { 1306 rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); 1307 } 1308 1309 out: 1310 pthread_mutex_unlock(&g_vhost_mutex); 1311 return rc; 1312 } 1313 #endif 1314 1315 int 1316 spdk_vhost_set_socket_path(const char *basename) 1317 { 1318 int ret; 1319 1320 if (basename && strlen(basename) > 0) { 1321 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1322 if (ret <= 0) { 1323 return -EINVAL; 1324 } 1325 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1326 SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret); 1327 return -EINVAL; 1328 } 1329 1330 if (dev_dirname[ret - 1] != '/') { 1331 dev_dirname[ret] = '/'; 1332 dev_dirname[ret + 1] = '\0'; 1333 } 1334 } 1335 1336 return 0; 1337 } 1338 1339 void 1340 vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1341 { 1342 assert(vdev->backend->dump_info_json != NULL); 1343 vdev->backend->dump_info_json(vdev, w); 1344 } 1345 1346 int 1347 spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev) 1348 { 1349 if (vdev->pending_async_op_num) { 1350 return -EBUSY; 1351 } 1352 1353 return vdev->backend->remove_device(vdev); 1354 } 1355 1356 int 1357 vhost_new_connection_cb(int vid, const char *ifname) 1358 { 1359 struct spdk_vhost_dev *vdev; 1360 struct spdk_vhost_session *vsession; 1361 1362 pthread_mutex_lock(&g_vhost_mutex); 1363 1364 vdev = spdk_vhost_dev_find(ifname); 1365 if (vdev == NULL) { 1366 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid); 1367 pthread_mutex_unlock(&g_vhost_mutex); 1368 return -1; 1369 } 1370 1371 /* We expect sessions inside vdev->vsessions to be sorted in ascending 1372 * order in regard of vsession->id. For now we always set id = vsessions_cnt++ 1373 * and append each session to the very end of the vsessions list. 1374 * This is required for spdk_vhost_dev_foreach_session() to work. 1375 */ 1376 if (vdev->vsessions_num == UINT_MAX) { 1377 assert(false); 1378 return -EINVAL; 1379 } 1380 1381 if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) + 1382 vdev->backend->session_ctx_size)) { 1383 SPDK_ERRLOG("vsession alloc failed\n"); 1384 pthread_mutex_unlock(&g_vhost_mutex); 1385 return -1; 1386 } 1387 memset(vsession, 0, sizeof(*vsession) + vdev->backend->session_ctx_size); 1388 1389 vsession->vdev = vdev; 1390 vsession->vid = vid; 1391 vsession->id = vdev->vsessions_num++; 1392 vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid); 1393 if (vsession->name == NULL) { 1394 SPDK_ERRLOG("vsession alloc failed\n"); 1395 pthread_mutex_unlock(&g_vhost_mutex); 1396 free(vsession); 1397 return -1; 1398 } 1399 vsession->started = false; 1400 vsession->initialized = false; 1401 vsession->next_stats_check_time = 0; 1402 vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS * 1403 spdk_get_ticks_hz() / 1000UL; 1404 TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq); 1405 1406 vhost_session_install_rte_compat_hooks(vsession); 1407 pthread_mutex_unlock(&g_vhost_mutex); 1408 return 0; 1409 } 1410 1411 int 1412 vhost_destroy_connection_cb(int vid) 1413 { 1414 struct spdk_vhost_session *vsession; 1415 int rc = 0; 1416 1417 pthread_mutex_lock(&g_vhost_mutex); 1418 vsession = vhost_session_find_by_vid(vid); 1419 if (vsession == NULL) { 1420 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); 1421 pthread_mutex_unlock(&g_vhost_mutex); 1422 return -EINVAL; 1423 } 1424 1425 if (vsession->started) { 1426 rc = _stop_session(vsession); 1427 } 1428 1429 TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); 1430 free(vsession->name); 1431 free(vsession); 1432 pthread_mutex_unlock(&g_vhost_mutex); 1433 1434 return rc; 1435 } 1436 1437 void 1438 spdk_vhost_lock(void) 1439 { 1440 pthread_mutex_lock(&g_vhost_mutex); 1441 } 1442 1443 int 1444 spdk_vhost_trylock(void) 1445 { 1446 return -pthread_mutex_trylock(&g_vhost_mutex); 1447 } 1448 1449 void 1450 spdk_vhost_unlock(void) 1451 { 1452 pthread_mutex_unlock(&g_vhost_mutex); 1453 } 1454 1455 void 1456 spdk_vhost_init(spdk_vhost_init_cb init_cb) 1457 { 1458 size_t len; 1459 int ret; 1460 1461 g_vhost_init_thread = spdk_get_thread(); 1462 assert(g_vhost_init_thread != NULL); 1463 1464 if (dev_dirname[0] == '\0') { 1465 if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) { 1466 SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno)); 1467 ret = -1; 1468 goto out; 1469 } 1470 1471 len = strlen(dev_dirname); 1472 if (dev_dirname[len - 1] != '/') { 1473 dev_dirname[len] = '/'; 1474 dev_dirname[len + 1] = '\0'; 1475 } 1476 } 1477 1478 ret = sem_init(&g_dpdk_sem, 0, 0); 1479 if (ret != 0) { 1480 SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n"); 1481 ret = -1; 1482 goto out; 1483 } 1484 1485 ret = vhost_scsi_controller_construct(); 1486 if (ret != 0) { 1487 SPDK_ERRLOG("Cannot construct vhost controllers\n"); 1488 goto out; 1489 } 1490 1491 ret = vhost_blk_controller_construct(); 1492 if (ret != 0) { 1493 SPDK_ERRLOG("Cannot construct vhost block controllers\n"); 1494 goto out; 1495 } 1496 1497 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB 1498 ret = vhost_nvme_controller_construct(); 1499 if (ret != 0) { 1500 SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); 1501 goto out; 1502 } 1503 #endif 1504 1505 out: 1506 init_cb(ret); 1507 } 1508 1509 static void 1510 _spdk_vhost_fini(void *arg1) 1511 { 1512 struct spdk_vhost_dev *vdev, *tmp; 1513 1514 spdk_vhost_lock(); 1515 vdev = spdk_vhost_dev_next(NULL); 1516 while (vdev != NULL) { 1517 tmp = spdk_vhost_dev_next(vdev); 1518 spdk_vhost_dev_remove(vdev); 1519 /* don't care if it fails, there's nothing we can do for now */ 1520 vdev = tmp; 1521 } 1522 spdk_vhost_unlock(); 1523 1524 /* All devices are removed now. */ 1525 sem_destroy(&g_dpdk_sem); 1526 1527 g_fini_cpl_cb(); 1528 } 1529 1530 static void * 1531 session_shutdown(void *arg) 1532 { 1533 struct spdk_vhost_dev *vdev = NULL; 1534 1535 TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) { 1536 vhost_driver_unregister(vdev->path); 1537 vdev->registered = false; 1538 } 1539 1540 SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n"); 1541 spdk_thread_send_msg(g_vhost_init_thread, _spdk_vhost_fini, NULL); 1542 return NULL; 1543 } 1544 1545 void 1546 spdk_vhost_fini(spdk_vhost_fini_cb fini_cb) 1547 { 1548 pthread_t tid; 1549 int rc; 1550 1551 assert(spdk_get_thread() == g_vhost_init_thread); 1552 g_fini_cpl_cb = fini_cb; 1553 1554 /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK 1555 * ops for stopping a device or removing a connection, we need to call it from 1556 * a separate thread to avoid deadlock. 1557 */ 1558 rc = pthread_create(&tid, NULL, &session_shutdown, NULL); 1559 if (rc < 0) { 1560 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc)); 1561 abort(); 1562 } 1563 pthread_detach(tid); 1564 } 1565 1566 void 1567 spdk_vhost_config_json(struct spdk_json_write_ctx *w) 1568 { 1569 struct spdk_vhost_dev *vdev; 1570 uint32_t delay_base_us; 1571 uint32_t iops_threshold; 1572 1573 spdk_json_write_array_begin(w); 1574 1575 spdk_vhost_lock(); 1576 vdev = spdk_vhost_dev_next(NULL); 1577 while (vdev != NULL) { 1578 vdev->backend->write_config_json(vdev, w); 1579 1580 spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold); 1581 if (delay_base_us) { 1582 spdk_json_write_object_begin(w); 1583 spdk_json_write_named_string(w, "method", "vhost_controller_set_coalescing"); 1584 1585 spdk_json_write_named_object_begin(w, "params"); 1586 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1587 spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us); 1588 spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold); 1589 spdk_json_write_object_end(w); 1590 1591 spdk_json_write_object_end(w); 1592 } 1593 vdev = spdk_vhost_dev_next(vdev); 1594 } 1595 spdk_vhost_unlock(); 1596 1597 spdk_json_write_array_end(w); 1598 } 1599 1600 SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST) 1601 SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING) 1602