1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2019 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <unistd.h> 7 #include <stdbool.h> 8 #include <signal.h> 9 #include <assert.h> 10 #include <semaphore.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/virtio_ring.h> 13 14 #include <rte_atomic.h> 15 #include <rte_cycles.h> 16 #include <rte_log.h> 17 #include <rte_malloc.h> 18 #include <rte_vhost.h> 19 20 #include "vhost_blk.h" 21 #include "blk_spec.h" 22 23 #define VIRTQ_DESC_F_NEXT 1 24 #define VIRTQ_DESC_F_AVAIL (1 << 7) 25 #define VIRTQ_DESC_F_USED (1 << 15) 26 27 #define MAX_TASK 12 28 29 #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \ 30 (1ULL << VIRTIO_F_VERSION_1) |\ 31 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ 32 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) 33 34 /* Path to folder where character device will be created. Can be set by user. */ 35 static char dev_pathname[PATH_MAX] = ""; 36 static sem_t exit_sem; 37 static int g_should_stop = -1; 38 39 struct vhost_blk_ctrlr * 40 vhost_blk_ctrlr_find(const char *ctrlr_name) 41 { 42 if (ctrlr_name == NULL) 43 return NULL; 44 45 /* currently we only support 1 socket file fd */ 46 return g_vhost_ctrlr; 47 } 48 49 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len) 50 { 51 char path[PATH_MAX]; 52 struct vhost_blk_ctrlr *ctrlr; 53 int ret = 0; 54 55 ret = rte_vhost_get_ifname(vid, path, PATH_MAX); 56 if (ret) { 57 fprintf(stderr, "Cannot get socket name\n"); 58 assert(ret != 0); 59 } 60 61 ctrlr = vhost_blk_ctrlr_find(path); 62 if (!ctrlr) { 63 fprintf(stderr, "Controller is not ready\n"); 64 assert(ctrlr != NULL); 65 } 66 67 assert(ctrlr->mem != NULL); 68 69 return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); 70 } 71 72 static struct vring_packed_desc * 73 descriptor_get_next_packed(struct rte_vhost_vring *vq, 74 uint16_t *idx) 75 { 76 if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) { 77 *idx += 1; 78 return &vq->desc_packed[*idx % vq->size]; 79 } 80 81 return NULL; 82 } 83 84 static bool 85 descriptor_has_next_packed(struct vring_packed_desc *cur_desc) 86 { 87 return !!(cur_desc->flags & VRING_DESC_F_NEXT); 88 } 89 90 static bool 91 descriptor_is_wr_packed(struct vring_packed_desc *cur_desc) 92 { 93 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 94 } 95 96 static struct rte_vhost_inflight_desc_packed * 97 inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed, 98 struct rte_vhost_inflight_desc_packed *cur_desc) 99 { 100 if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT)) 101 return &inflight_packed->desc[cur_desc->next]; 102 103 return NULL; 104 } 105 106 static bool 107 inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc) 108 { 109 return !!(cur_desc->flags & VRING_DESC_F_NEXT); 110 } 111 112 static bool 113 inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc) 114 { 115 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 116 } 117 118 static void 119 inflight_process_payload_chain_packed(struct inflight_blk_task *task) 120 { 121 void *data; 122 uint64_t chunck_len; 123 struct vhost_blk_task *blk_task; 124 struct rte_vhost_inflight_desc_packed *desc; 125 126 blk_task = &task->blk_task; 127 blk_task->iovs_cnt = 0; 128 129 do { 130 desc = task->inflight_desc; 131 chunck_len = desc->len; 132 data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid, 133 desc->addr, 134 &chunck_len); 135 if (!data || chunck_len != desc->len) { 136 fprintf(stderr, "failed to translate desc address.\n"); 137 return; 138 } 139 140 blk_task->iovs[blk_task->iovs_cnt].iov_base = data; 141 blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len; 142 blk_task->data_len += desc->len; 143 blk_task->iovs_cnt++; 144 task->inflight_desc = inflight_desc_get_next( 145 task->inflight_packed, desc); 146 } while (inflight_desc_has_next(task->inflight_desc)); 147 148 chunck_len = task->inflight_desc->len; 149 blk_task->status = (void *)(uintptr_t)gpa_to_vva( 150 blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len); 151 if (!blk_task->status || chunck_len != task->inflight_desc->len) 152 fprintf(stderr, "failed to translate desc address.\n"); 153 } 154 155 static void 156 inflight_submit_completion_packed(struct inflight_blk_task *task, 157 uint32_t q_idx, uint16_t *used_id, 158 bool *used_wrap_counter) 159 { 160 struct vhost_blk_ctrlr *ctrlr; 161 struct rte_vhost_vring *vq; 162 struct vring_packed_desc *desc; 163 int ret; 164 165 ctrlr = vhost_blk_ctrlr_find(dev_pathname); 166 vq = task->blk_task.vq; 167 168 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx, 169 task->blk_task.head_idx); 170 if (ret != 0) 171 fprintf(stderr, "failed to set last inflight io\n"); 172 173 desc = &vq->desc_packed[*used_id]; 174 desc->id = task->blk_task.buffer_id; 175 rte_smp_mb(); 176 if (*used_wrap_counter) 177 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; 178 else 179 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); 180 rte_smp_mb(); 181 182 *used_id += task->blk_task.iovs_cnt + 2; 183 if (*used_id >= vq->size) { 184 *used_id -= vq->size; 185 *used_wrap_counter = !(*used_wrap_counter); 186 } 187 188 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx, 189 task->blk_task.head_idx); 190 if (ret != 0) 191 fprintf(stderr, "failed to clear inflight io\n"); 192 193 /* Send an interrupt back to the guest VM so that it knows 194 * a completion is ready to be processed. 195 */ 196 rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx); 197 } 198 199 static void 200 submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx, 201 uint16_t *used_id, bool *used_wrap_counter) 202 { 203 struct vhost_blk_ctrlr *ctrlr; 204 struct rte_vhost_vring *vq; 205 struct vring_packed_desc *desc; 206 int ret; 207 208 ctrlr = vhost_blk_ctrlr_find(dev_pathname); 209 vq = task->vq; 210 211 ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx, 212 task->inflight_idx); 213 if (ret != 0) 214 fprintf(stderr, "failed to set last inflight io\n"); 215 216 desc = &vq->desc_packed[*used_id]; 217 desc->id = task->buffer_id; 218 rte_smp_mb(); 219 if (*used_wrap_counter) 220 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; 221 else 222 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); 223 rte_smp_mb(); 224 225 *used_id += task->iovs_cnt + 2; 226 if (*used_id >= vq->size) { 227 *used_id -= vq->size; 228 *used_wrap_counter = !(*used_wrap_counter); 229 } 230 231 ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx, 232 task->inflight_idx); 233 if (ret != 0) 234 fprintf(stderr, "failed to clear inflight io\n"); 235 236 /* Send an interrupt back to the guest VM so that it knows 237 * a completion is ready to be processed. 238 */ 239 rte_vhost_vring_call(task->bdev->vid, q_idx); 240 } 241 242 static void 243 vhost_process_payload_chain_packed(struct vhost_blk_task *task, 244 uint16_t *idx) 245 { 246 void *data; 247 uint64_t chunck_len; 248 249 task->iovs_cnt = 0; 250 251 do { 252 chunck_len = task->desc_packed->len; 253 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 254 task->desc_packed->addr, 255 &chunck_len); 256 if (!data || chunck_len != task->desc_packed->len) { 257 fprintf(stderr, "failed to translate desc address.\n"); 258 return; 259 } 260 261 task->iovs[task->iovs_cnt].iov_base = data; 262 task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len; 263 task->data_len += task->desc_packed->len; 264 task->iovs_cnt++; 265 task->desc_packed = descriptor_get_next_packed(task->vq, idx); 266 } while (descriptor_has_next_packed(task->desc_packed)); 267 268 task->last_idx = *idx % task->vq->size; 269 chunck_len = task->desc_packed->len; 270 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 271 task->desc_packed->addr, 272 &chunck_len); 273 if (!task->status || chunck_len != task->desc_packed->len) 274 fprintf(stderr, "failed to translate desc address.\n"); 275 } 276 277 278 static int 279 descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx, 280 bool avail_wrap_counter) 281 { 282 uint16_t flags = vring->desc_packed[idx].flags; 283 284 return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) && 285 (!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter)); 286 } 287 288 static void 289 process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx) 290 { 291 bool avail_wrap_counter, used_wrap_counter; 292 uint16_t avail_idx, used_idx; 293 int ret; 294 uint64_t chunck_len; 295 struct vhost_blk_queue *blk_vq; 296 struct rte_vhost_vring *vq; 297 struct vhost_blk_task *task; 298 299 blk_vq = &ctrlr->bdev->queues[q_idx]; 300 vq = &blk_vq->vq; 301 302 avail_idx = blk_vq->last_avail_idx; 303 avail_wrap_counter = blk_vq->avail_wrap_counter; 304 used_idx = blk_vq->last_used_idx; 305 used_wrap_counter = blk_vq->used_wrap_counter; 306 307 task = rte_zmalloc(NULL, sizeof(*task), 0); 308 assert(task != NULL); 309 task->vq = vq; 310 task->bdev = ctrlr->bdev; 311 312 while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) { 313 task->head_idx = avail_idx; 314 task->desc_packed = &task->vq->desc_packed[task->head_idx]; 315 task->iovs_cnt = 0; 316 task->data_len = 0; 317 task->req = NULL; 318 task->status = NULL; 319 320 /* does not support indirect descriptors */ 321 assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0); 322 323 chunck_len = task->desc_packed->len; 324 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 325 task->desc_packed->addr, &chunck_len); 326 if (!task->req || chunck_len != task->desc_packed->len) { 327 fprintf(stderr, "failed to translate desc address.\n"); 328 rte_free(task); 329 return; 330 } 331 332 task->desc_packed = descriptor_get_next_packed(task->vq, 333 &avail_idx); 334 assert(task->desc_packed != NULL); 335 if (!descriptor_has_next_packed(task->desc_packed)) { 336 task->dxfer_dir = BLK_DIR_NONE; 337 task->last_idx = avail_idx % vq->size; 338 chunck_len = task->desc_packed->len; 339 task->status = (void *)(uintptr_t) 340 gpa_to_vva(task->bdev->vid, 341 task->desc_packed->addr, 342 &chunck_len); 343 if (!task->status || 344 chunck_len != task->desc_packed->len) { 345 fprintf(stderr, 346 "failed to translate desc address.\n"); 347 rte_free(task); 348 return; 349 } 350 } else { 351 task->readtype = descriptor_is_wr_packed( 352 task->desc_packed); 353 vhost_process_payload_chain_packed(task, &avail_idx); 354 } 355 task->buffer_id = vq->desc_packed[task->last_idx].id; 356 rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx, 357 task->head_idx, 358 task->last_idx, 359 &task->inflight_idx); 360 361 if (++avail_idx >= vq->size) { 362 avail_idx -= vq->size; 363 avail_wrap_counter = !avail_wrap_counter; 364 } 365 blk_vq->last_avail_idx = avail_idx; 366 blk_vq->avail_wrap_counter = avail_wrap_counter; 367 368 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); 369 if (ret) { 370 /* invalid response */ 371 *task->status = VIRTIO_BLK_S_IOERR; 372 } else { 373 /* successfully */ 374 *task->status = VIRTIO_BLK_S_OK; 375 } 376 377 submit_completion_packed(task, q_idx, &used_idx, 378 &used_wrap_counter); 379 blk_vq->last_used_idx = used_idx; 380 blk_vq->used_wrap_counter = used_wrap_counter; 381 } 382 383 rte_free(task); 384 } 385 386 static void 387 submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr, 388 uint16_t q_idx) 389 { 390 bool used_wrap_counter; 391 int req_idx, ret; 392 uint16_t used_idx; 393 uint64_t chunck_len; 394 struct vhost_blk_queue *blk_vq; 395 struct rte_vhost_ring_inflight *inflight_vq; 396 struct rte_vhost_resubmit_info *resubmit_info; 397 struct rte_vhost_vring *vq; 398 struct inflight_blk_task *task; 399 struct vhost_blk_task *blk_task; 400 struct rte_vhost_inflight_info_packed *inflight_info; 401 402 blk_vq = &ctrlr->bdev->queues[q_idx]; 403 vq = &blk_vq->vq; 404 inflight_vq = &blk_vq->inflight_vq; 405 resubmit_info = inflight_vq->resubmit_inflight; 406 inflight_info = inflight_vq->inflight_packed; 407 used_idx = blk_vq->last_used_idx; 408 used_wrap_counter = blk_vq->used_wrap_counter; 409 410 task = rte_malloc(NULL, sizeof(*task), 0); 411 if (!task) { 412 fprintf(stderr, "failed to allocate memory\n"); 413 return; 414 } 415 blk_task = &task->blk_task; 416 blk_task->vq = vq; 417 blk_task->bdev = ctrlr->bdev; 418 task->inflight_packed = inflight_vq->inflight_packed; 419 420 while (resubmit_info->resubmit_num-- > 0) { 421 req_idx = resubmit_info->resubmit_num; 422 blk_task->head_idx = 423 resubmit_info->resubmit_list[req_idx].index; 424 task->inflight_desc = 425 &inflight_info->desc[blk_task->head_idx]; 426 task->blk_task.iovs_cnt = 0; 427 task->blk_task.data_len = 0; 428 task->blk_task.req = NULL; 429 task->blk_task.status = NULL; 430 431 /* update the avail idx too 432 * as it's initial value equals to used idx 433 */ 434 blk_vq->last_avail_idx += task->inflight_desc->num; 435 if (blk_vq->last_avail_idx >= vq->size) { 436 blk_vq->last_avail_idx -= vq->size; 437 blk_vq->avail_wrap_counter = 438 !blk_vq->avail_wrap_counter; 439 } 440 441 /* does not support indirect descriptors */ 442 assert(task->inflight_desc != NULL); 443 assert((task->inflight_desc->flags & 444 VRING_DESC_F_INDIRECT) == 0); 445 446 chunck_len = task->inflight_desc->len; 447 blk_task->req = (void *)(uintptr_t) 448 gpa_to_vva(blk_task->bdev->vid, 449 task->inflight_desc->addr, 450 &chunck_len); 451 if (!blk_task->req || 452 chunck_len != task->inflight_desc->len) { 453 fprintf(stderr, "failed to translate desc address.\n"); 454 rte_free(task); 455 return; 456 } 457 458 task->inflight_desc = inflight_desc_get_next( 459 task->inflight_packed, task->inflight_desc); 460 assert(task->inflight_desc != NULL); 461 if (!inflight_desc_has_next(task->inflight_desc)) { 462 blk_task->dxfer_dir = BLK_DIR_NONE; 463 chunck_len = task->inflight_desc->len; 464 blk_task->status = (void *)(uintptr_t) 465 gpa_to_vva(blk_task->bdev->vid, 466 task->inflight_desc->addr, 467 &chunck_len); 468 if (!blk_task->status || 469 chunck_len != task->inflight_desc->len) { 470 fprintf(stderr, 471 "failed to translate desc address.\n"); 472 rte_free(task); 473 return; 474 } 475 } else { 476 blk_task->readtype = 477 inflight_desc_is_wr(task->inflight_desc); 478 inflight_process_payload_chain_packed(task); 479 } 480 481 blk_task->buffer_id = task->inflight_desc->id; 482 483 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task); 484 if (ret) 485 /* invalid response */ 486 *blk_task->status = VIRTIO_BLK_S_IOERR; 487 else 488 /* successfully */ 489 *blk_task->status = VIRTIO_BLK_S_OK; 490 491 inflight_submit_completion_packed(task, q_idx, &used_idx, 492 &used_wrap_counter); 493 494 blk_vq->last_used_idx = used_idx; 495 blk_vq->used_wrap_counter = used_wrap_counter; 496 } 497 498 rte_free(task); 499 } 500 501 static struct vring_desc * 502 descriptor_get_next_split(struct vring_desc *vq_desc, 503 struct vring_desc *cur_desc) 504 { 505 return &vq_desc[cur_desc->next]; 506 } 507 508 static bool 509 descriptor_has_next_split(struct vring_desc *cur_desc) 510 { 511 return !!(cur_desc->flags & VRING_DESC_F_NEXT); 512 } 513 514 static bool 515 descriptor_is_wr_split(struct vring_desc *cur_desc) 516 { 517 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 518 } 519 520 static void 521 vhost_process_payload_chain_split(struct vhost_blk_task *task) 522 { 523 void *data; 524 uint64_t chunck_len; 525 526 task->iovs_cnt = 0; 527 528 do { 529 chunck_len = task->desc_split->len; 530 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 531 task->desc_split->addr, 532 &chunck_len); 533 if (!data || chunck_len != task->desc_split->len) { 534 fprintf(stderr, "failed to translate desc address.\n"); 535 return; 536 } 537 538 task->iovs[task->iovs_cnt].iov_base = data; 539 task->iovs[task->iovs_cnt].iov_len = task->desc_split->len; 540 task->data_len += task->desc_split->len; 541 task->iovs_cnt++; 542 task->desc_split = 543 descriptor_get_next_split(task->vq->desc, task->desc_split); 544 } while (descriptor_has_next_split(task->desc_split)); 545 546 chunck_len = task->desc_split->len; 547 task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 548 task->desc_split->addr, 549 &chunck_len); 550 if (!task->status || chunck_len != task->desc_split->len) 551 fprintf(stderr, "failed to translate desc address.\n"); 552 } 553 554 static void 555 submit_completion_split(struct vhost_blk_task *task, uint32_t vid, 556 uint32_t q_idx) 557 { 558 struct rte_vhost_vring *vq; 559 struct vring_used *used; 560 561 vq = task->vq; 562 used = vq->used; 563 564 rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx); 565 566 /* Fill out the next entry in the "used" ring. id = the 567 * index of the descriptor that contained the blk request. 568 * len = the total amount of data transferred for the blk 569 * request. We must report the correct len, for variable 570 * length blk CDBs, where we may return less data than 571 * allocated by the guest VM. 572 */ 573 used->ring[used->idx & (vq->size - 1)].id = task->req_idx; 574 used->ring[used->idx & (vq->size - 1)].len = task->data_len; 575 rte_smp_mb(); 576 used->idx++; 577 rte_smp_mb(); 578 579 rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx); 580 581 /* Send an interrupt back to the guest VM so that it knows 582 * a completion is ready to be processed. 583 */ 584 rte_vhost_vring_call(task->bdev->vid, q_idx); 585 } 586 587 static void 588 submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr, 589 uint32_t q_idx) 590 { 591 struct vhost_blk_queue *blk_vq; 592 struct rte_vhost_ring_inflight *inflight_vq; 593 struct rte_vhost_resubmit_info *resubmit_inflight; 594 struct rte_vhost_resubmit_desc *resubmit_list; 595 struct vhost_blk_task *task; 596 int req_idx; 597 uint64_t chunck_len; 598 int ret; 599 600 blk_vq = &ctrlr->bdev->queues[q_idx]; 601 inflight_vq = &blk_vq->inflight_vq; 602 resubmit_inflight = inflight_vq->resubmit_inflight; 603 resubmit_list = resubmit_inflight->resubmit_list; 604 605 task = rte_zmalloc(NULL, sizeof(*task), 0); 606 assert(task != NULL); 607 608 task->ctrlr = ctrlr; 609 task->bdev = ctrlr->bdev; 610 task->vq = &blk_vq->vq; 611 612 while (resubmit_inflight->resubmit_num-- > 0) { 613 req_idx = resubmit_list[resubmit_inflight->resubmit_num].index; 614 task->req_idx = req_idx; 615 task->desc_split = &task->vq->desc[task->req_idx]; 616 task->iovs_cnt = 0; 617 task->data_len = 0; 618 task->req = NULL; 619 task->status = NULL; 620 621 /* does not support indirect descriptors */ 622 assert(task->desc_split != NULL); 623 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0); 624 625 chunck_len = task->desc_split->len; 626 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 627 task->desc_split->addr, &chunck_len); 628 if (!task->req || chunck_len != task->desc_split->len) { 629 fprintf(stderr, "failed to translate desc address.\n"); 630 rte_free(task); 631 return; 632 } 633 634 task->desc_split = descriptor_get_next_split(task->vq->desc, 635 task->desc_split); 636 if (!descriptor_has_next_split(task->desc_split)) { 637 task->dxfer_dir = BLK_DIR_NONE; 638 chunck_len = task->desc_split->len; 639 task->status = (void *)(uintptr_t) 640 gpa_to_vva(task->bdev->vid, 641 task->desc_split->addr, 642 &chunck_len); 643 if (!task->status || 644 chunck_len != task->desc_split->len) { 645 fprintf(stderr, 646 "failed to translate desc address.\n"); 647 rte_free(task); 648 return; 649 } 650 } else { 651 task->readtype = 652 descriptor_is_wr_split(task->desc_split); 653 vhost_process_payload_chain_split(task); 654 } 655 656 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); 657 if (ret) { 658 /* invalid response */ 659 *task->status = VIRTIO_BLK_S_IOERR; 660 } else { 661 /* successfully */ 662 *task->status = VIRTIO_BLK_S_OK; 663 } 664 submit_completion_split(task, ctrlr->bdev->vid, q_idx); 665 } 666 667 rte_free(task); 668 } 669 670 static void 671 process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx) 672 { 673 int ret; 674 int req_idx; 675 uint16_t last_idx; 676 uint64_t chunck_len; 677 struct vhost_blk_queue *blk_vq; 678 struct rte_vhost_vring *vq; 679 struct vhost_blk_task *task; 680 681 blk_vq = &ctrlr->bdev->queues[q_idx]; 682 vq = &blk_vq->vq; 683 684 task = rte_zmalloc(NULL, sizeof(*task), 0); 685 assert(task != NULL); 686 task->ctrlr = ctrlr; 687 task->bdev = ctrlr->bdev; 688 task->vq = vq; 689 690 while (vq->avail->idx != blk_vq->last_avail_idx) { 691 last_idx = blk_vq->last_avail_idx & (vq->size - 1); 692 req_idx = vq->avail->ring[last_idx]; 693 task->req_idx = req_idx; 694 task->desc_split = &task->vq->desc[task->req_idx]; 695 task->iovs_cnt = 0; 696 task->data_len = 0; 697 task->req = NULL; 698 task->status = NULL; 699 700 rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx, 701 task->req_idx); 702 703 /* does not support indirect descriptors */ 704 assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0); 705 706 chunck_len = task->desc_split->len; 707 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, 708 task->desc_split->addr, &chunck_len); 709 if (!task->req || chunck_len != task->desc_split->len) { 710 fprintf(stderr, "failed to translate desc address.\n"); 711 rte_free(task); 712 return; 713 } 714 715 task->desc_split = descriptor_get_next_split(task->vq->desc, 716 task->desc_split); 717 if (!descriptor_has_next_split(task->desc_split)) { 718 task->dxfer_dir = BLK_DIR_NONE; 719 chunck_len = task->desc_split->len; 720 task->status = (void *)(uintptr_t) 721 gpa_to_vva(task->bdev->vid, 722 task->desc_split->addr, 723 &chunck_len); 724 if (!task->status || 725 chunck_len != task->desc_split->len) { 726 fprintf(stderr, 727 "failed to translate desc address.\n"); 728 rte_free(task); 729 return; 730 } 731 } else { 732 task->readtype = 733 descriptor_is_wr_split(task->desc_split); 734 vhost_process_payload_chain_split(task); 735 } 736 blk_vq->last_avail_idx++; 737 738 ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); 739 if (ret) { 740 /* invalid response */ 741 *task->status = VIRTIO_BLK_S_IOERR; 742 } else { 743 /* successfully */ 744 *task->status = VIRTIO_BLK_S_OK; 745 } 746 747 submit_completion_split(task, ctrlr->bdev->vid, q_idx); 748 } 749 750 rte_free(task); 751 } 752 753 static void * 754 ctrlr_worker(void *arg) 755 { 756 struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg; 757 struct vhost_blk_queue *blk_vq; 758 struct rte_vhost_ring_inflight *inflight_vq; 759 cpu_set_t cpuset; 760 pthread_t thread; 761 int i; 762 763 fprintf(stdout, "Ctrlr Worker Thread start\n"); 764 765 if (ctrlr == NULL || ctrlr->bdev == NULL) { 766 fprintf(stderr, 767 "%s: Error, invalid argument passed to worker thread\n", 768 __func__); 769 exit(0); 770 } 771 772 thread = pthread_self(); 773 CPU_ZERO(&cpuset); 774 CPU_SET(0, &cpuset); 775 pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); 776 777 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 778 blk_vq = &ctrlr->bdev->queues[i]; 779 inflight_vq = &blk_vq->inflight_vq; 780 if (inflight_vq->resubmit_inflight != NULL && 781 inflight_vq->resubmit_inflight->resubmit_num != 0) { 782 if (ctrlr->packed_ring) 783 submit_inflight_vq_packed(ctrlr, i); 784 else 785 submit_inflight_vq_split(ctrlr, i); 786 } 787 } 788 789 while (!g_should_stop && ctrlr->bdev != NULL) { 790 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 791 if (ctrlr->packed_ring) 792 process_requestq_packed(ctrlr, i); 793 else 794 process_requestq_split(ctrlr, i); 795 } 796 } 797 798 g_should_stop = 2; 799 fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); 800 sem_post(&exit_sem); 801 return NULL; 802 } 803 804 static int 805 new_device(int vid) 806 { 807 struct vhost_blk_ctrlr *ctrlr; 808 struct vhost_blk_queue *blk_vq; 809 struct rte_vhost_vring *vq; 810 uint64_t features; 811 pthread_t tid; 812 int i, ret; 813 814 ctrlr = vhost_blk_ctrlr_find(dev_pathname); 815 if (!ctrlr) { 816 fprintf(stderr, "Controller is not ready\n"); 817 return -1; 818 } 819 820 if (ctrlr->started) 821 return 0; 822 823 ctrlr->bdev->vid = vid; 824 ret = rte_vhost_get_negotiated_features(vid, &features); 825 if (ret) { 826 fprintf(stderr, "failed to get the negotiated features\n"); 827 return -1; 828 } 829 ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED)); 830 831 ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); 832 if (ret) 833 fprintf(stderr, "Get Controller memory region failed\n"); 834 assert(ctrlr->mem != NULL); 835 836 /* Disable Notifications and init last idx */ 837 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 838 blk_vq = &ctrlr->bdev->queues[i]; 839 vq = &blk_vq->vq; 840 841 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); 842 assert(ret == 0); 843 844 ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i, 845 &blk_vq->last_avail_idx, 846 &blk_vq->last_used_idx); 847 assert(ret == 0); 848 849 ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i, 850 &blk_vq->inflight_vq); 851 assert(ret == 0); 852 853 if (ctrlr->packed_ring) { 854 /* for the reconnection */ 855 ret = rte_vhost_get_vring_base_from_inflight( 856 ctrlr->bdev->vid, i, 857 &blk_vq->last_avail_idx, 858 &blk_vq->last_used_idx); 859 assert(ret == 0); 860 861 blk_vq->avail_wrap_counter = blk_vq->last_avail_idx & 862 (1 << 15); 863 blk_vq->last_avail_idx = blk_vq->last_avail_idx & 864 0x7fff; 865 blk_vq->used_wrap_counter = blk_vq->last_used_idx & 866 (1 << 15); 867 blk_vq->last_used_idx = blk_vq->last_used_idx & 868 0x7fff; 869 } 870 871 rte_vhost_enable_guest_notification(vid, i, 0); 872 } 873 874 /* start polling vring */ 875 g_should_stop = 0; 876 fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid); 877 if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { 878 fprintf(stderr, "Worker Thread Started Failed\n"); 879 return -1; 880 } 881 882 /* device has been started */ 883 ctrlr->started = 1; 884 pthread_detach(tid); 885 return 0; 886 } 887 888 static void 889 destroy_device(int vid) 890 { 891 char path[PATH_MAX]; 892 struct vhost_blk_ctrlr *ctrlr; 893 struct vhost_blk_queue *blk_vq; 894 int i, ret; 895 896 ret = rte_vhost_get_ifname(vid, path, PATH_MAX); 897 if (ret) { 898 fprintf(stderr, "Destroy Ctrlr Failed\n"); 899 return; 900 } 901 902 fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); 903 ctrlr = vhost_blk_ctrlr_find(path); 904 if (!ctrlr) { 905 fprintf(stderr, "Destroy Ctrlr Failed\n"); 906 return; 907 } 908 909 if (!ctrlr->started) 910 return; 911 912 g_should_stop = 1; 913 while (g_should_stop != 2) 914 ; 915 916 for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 917 blk_vq = &ctrlr->bdev->queues[i]; 918 if (ctrlr->packed_ring) { 919 blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter << 920 15); 921 blk_vq->last_used_idx |= (blk_vq->used_wrap_counter << 922 15); 923 } 924 rte_vhost_set_vring_base(ctrlr->bdev->vid, i, 925 blk_vq->last_avail_idx, 926 blk_vq->last_used_idx); 927 } 928 929 free(ctrlr->mem); 930 931 ctrlr->started = 0; 932 sem_wait(&exit_sem); 933 } 934 935 static int 936 new_connection(int vid) 937 { 938 /* extend the proper features for block device */ 939 vhost_session_install_rte_compat_hooks(vid); 940 941 return 0; 942 } 943 944 struct vhost_device_ops vhost_blk_device_ops = { 945 .new_device = new_device, 946 .destroy_device = destroy_device, 947 .new_connection = new_connection, 948 }; 949 950 static struct vhost_block_dev * 951 vhost_blk_bdev_construct(const char *bdev_name, 952 const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt, 953 bool wce_enable) 954 { 955 struct vhost_block_dev *bdev; 956 957 bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); 958 if (!bdev) 959 return NULL; 960 961 strncpy(bdev->name, bdev_name, sizeof(bdev->name)); 962 strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name)); 963 bdev->blocklen = blk_size; 964 bdev->blockcnt = blk_cnt; 965 bdev->write_cache = wce_enable; 966 967 fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen, 968 bdev->blockcnt); 969 970 /* use memory as disk storage space */ 971 bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); 972 if (!bdev->data) { 973 fprintf(stderr, "no enough reserved huge memory for disk\n"); 974 free(bdev); 975 return NULL; 976 } 977 978 return bdev; 979 } 980 981 static struct vhost_blk_ctrlr * 982 vhost_blk_ctrlr_construct(const char *ctrlr_name) 983 { 984 int ret; 985 struct vhost_blk_ctrlr *ctrlr; 986 char *path; 987 char cwd[PATH_MAX]; 988 989 /* always use current directory */ 990 path = getcwd(cwd, PATH_MAX); 991 if (!path) { 992 fprintf(stderr, "Cannot get current working directory\n"); 993 return NULL; 994 } 995 snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); 996 997 unlink(dev_pathname); 998 999 if (rte_vhost_driver_register(dev_pathname, 0) != 0) { 1000 fprintf(stderr, "socket %s already exists\n", dev_pathname); 1001 return NULL; 1002 } 1003 1004 ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES); 1005 if (ret != 0) { 1006 fprintf(stderr, "Set vhost driver features failed\n"); 1007 rte_vhost_driver_unregister(dev_pathname); 1008 return NULL; 1009 } 1010 1011 /* set proper features */ 1012 vhost_dev_install_rte_compat_hooks(dev_pathname); 1013 1014 ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); 1015 if (!ctrlr) { 1016 rte_vhost_driver_unregister(dev_pathname); 1017 return NULL; 1018 } 1019 1020 /* hardcoded block device information with 128MiB */ 1021 ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0", 1022 4096, 32768, 0); 1023 if (!ctrlr->bdev) { 1024 rte_free(ctrlr); 1025 rte_vhost_driver_unregister(dev_pathname); 1026 return NULL; 1027 } 1028 1029 rte_vhost_driver_callback_register(dev_pathname, 1030 &vhost_blk_device_ops); 1031 1032 return ctrlr; 1033 } 1034 1035 static void 1036 signal_handler(__rte_unused int signum) 1037 { 1038 struct vhost_blk_ctrlr *ctrlr; 1039 1040 unlink(dev_pathname); 1041 1042 if (g_should_stop != -1) { 1043 g_should_stop = 1; 1044 while (g_should_stop != 2) 1045 ; 1046 } 1047 1048 ctrlr = vhost_blk_ctrlr_find(dev_pathname); 1049 if (ctrlr != NULL) { 1050 if (ctrlr->bdev != NULL) { 1051 rte_free(ctrlr->bdev->data); 1052 rte_free(ctrlr->bdev); 1053 } 1054 rte_free(ctrlr); 1055 } 1056 1057 rte_vhost_driver_unregister(dev_pathname); 1058 exit(0); 1059 } 1060 1061 int main(int argc, char *argv[]) 1062 { 1063 int ret; 1064 1065 signal(SIGINT, signal_handler); 1066 1067 /* init EAL */ 1068 ret = rte_eal_init(argc, argv); 1069 if (ret < 0) 1070 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1071 1072 g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket"); 1073 if (g_vhost_ctrlr == NULL) { 1074 fprintf(stderr, "Construct vhost blk controller failed\n"); 1075 return 0; 1076 } 1077 1078 if (sem_init(&exit_sem, 0, 0) < 0) { 1079 fprintf(stderr, "Error init exit_sem\n"); 1080 return -1; 1081 } 1082 1083 rte_vhost_driver_start(dev_pathname); 1084 1085 /* loop for exit the application */ 1086 while (1) 1087 sleep(1); 1088 1089 return 0; 1090 } 1091