1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/thread.h" 40 #include "spdk/likely.h" 41 #include "spdk/string.h" 42 #include "spdk/util.h" 43 #include "spdk/vhost.h" 44 45 #include "vhost_internal.h" 46 47 struct spdk_vhost_blk_task { 48 struct spdk_bdev_io *bdev_io; 49 struct spdk_vhost_blk_dev *bvdev; 50 struct spdk_vhost_virtqueue *vq; 51 52 volatile uint8_t *status; 53 54 uint16_t req_idx; 55 56 /* for io wait */ 57 struct spdk_bdev_io_wait_entry bdev_io_wait; 58 59 /* If set, the task is currently used for I/O processing. */ 60 bool used; 61 62 /** Number of bytes that were written. */ 63 uint32_t used_len; 64 uint16_t iovcnt; 65 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 66 }; 67 68 struct spdk_vhost_blk_dev { 69 struct spdk_vhost_dev vdev; 70 struct spdk_bdev *bdev; 71 struct spdk_bdev_desc *bdev_desc; 72 struct spdk_io_channel *bdev_io_channel; 73 struct spdk_poller *requestq_poller; 74 struct spdk_vhost_dev_destroy_ctx destroy_ctx; 75 bool readonly; 76 }; 77 78 /* forward declaration */ 79 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 80 81 static int 82 process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev, 83 struct spdk_vhost_virtqueue *vq); 84 85 static void 86 blk_task_finish(struct spdk_vhost_blk_task *task) 87 { 88 assert(task->bvdev->vdev.task_cnt > 0); 89 task->bvdev->vdev.task_cnt--; 90 task->used = false; 91 } 92 93 static void 94 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 95 { 96 if (task->status) { 97 *task->status = status; 98 } 99 100 spdk_vhost_vq_used_ring_enqueue(&task->bvdev->vdev, task->vq, task->req_idx, 101 task->used_len); 102 blk_task_finish(task); 103 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 104 } 105 106 /* 107 * Process task's descriptor chain and setup data related fields. 108 * Return 109 * total size of suplied buffers 110 * 111 * FIXME: Make this function return to rd_cnt and wr_cnt 112 */ 113 static int 114 blk_iovs_setup(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 115 struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 116 { 117 struct vring_desc *desc, *desc_table; 118 uint16_t out_cnt = 0, cnt = 0; 119 uint32_t desc_table_size, len = 0; 120 int rc; 121 122 rc = spdk_vhost_vq_get_desc(vdev, vq, req_idx, &desc, &desc_table, &desc_table_size); 123 if (rc != 0) { 124 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 125 return -1; 126 } 127 128 while (1) { 129 /* 130 * Maximum cnt reached? 131 * Should not happen if request is well formatted, otherwise this is a BUG. 132 */ 133 if (spdk_unlikely(cnt == *iovs_cnt)) { 134 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Max IOVs in request reached (req_idx = %"PRIu16").\n", 135 req_idx); 136 return -1; 137 } 138 139 if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &cnt, desc))) { 140 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 141 req_idx, cnt); 142 return -1; 143 } 144 145 len += desc->len; 146 147 out_cnt += spdk_vhost_vring_desc_is_wr(desc); 148 149 rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 150 if (rc != 0) { 151 SPDK_ERRLOG("%s: Descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 152 vdev->name, req_idx); 153 return -1; 154 } else if (desc == NULL) { 155 break; 156 } 157 } 158 159 /* 160 * There must be least two descriptors. 161 * First contain request so it must be readable. 162 * Last descriptor contain buffer for response so it must be writable. 163 */ 164 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 165 return -1; 166 } 167 168 *length = len; 169 *iovs_cnt = cnt; 170 return 0; 171 } 172 173 static void 174 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 175 { 176 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 177 spdk_vhost_vq_used_ring_enqueue(&task->bvdev->vdev, task->vq, task->req_idx, 178 task->used_len); 179 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 180 task->req_idx, success ? "OK" : "FAIL"); 181 blk_task_finish(task); 182 } 183 184 static void 185 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 186 { 187 struct spdk_vhost_blk_task *task = cb_arg; 188 189 spdk_bdev_free_io(bdev_io); 190 blk_request_finish(success, task); 191 } 192 193 static void 194 blk_request_resubmit(void *arg) 195 { 196 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 197 int rc = 0; 198 199 rc = process_blk_request(task, task->bvdev, task->vq); 200 if (rc == 0) { 201 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 202 } else { 203 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 204 } 205 } 206 207 static inline void 208 blk_request_queue_io(struct spdk_vhost_blk_task *task) 209 { 210 int rc; 211 struct spdk_vhost_blk_dev *bvdev = task->bvdev; 212 struct spdk_bdev *bdev = bvdev->bdev; 213 214 task->bdev_io_wait.bdev = bdev; 215 task->bdev_io_wait.cb_fn = blk_request_resubmit; 216 task->bdev_io_wait.cb_arg = task; 217 218 rc = spdk_bdev_queue_io_wait(bdev, bvdev->bdev_io_channel, &task->bdev_io_wait); 219 if (rc != 0) { 220 SPDK_ERRLOG("Queue io failed in vhost_blk, rc=%d\n", rc); 221 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 222 } 223 } 224 225 static int 226 process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev, 227 struct spdk_vhost_virtqueue *vq) 228 { 229 const struct virtio_blk_outhdr *req; 230 struct iovec *iov; 231 uint32_t type; 232 uint32_t payload_len; 233 int rc; 234 235 if (blk_iovs_setup(&bvdev->vdev, vq, task->req_idx, task->iovs, &task->iovcnt, &payload_len)) { 236 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 237 /* Only READ and WRITE are supported for now. */ 238 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 239 return -1; 240 } 241 242 iov = &task->iovs[0]; 243 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 244 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 245 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 246 iov->iov_len, sizeof(*req), task->req_idx); 247 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 248 return -1; 249 } 250 251 req = iov->iov_base; 252 253 iov = &task->iovs[task->iovcnt - 1]; 254 if (spdk_unlikely(iov->iov_len != 1)) { 255 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 256 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 257 iov->iov_len, 1, task->req_idx); 258 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 259 return -1; 260 } 261 262 task->status = iov->iov_base; 263 payload_len -= sizeof(*req) + sizeof(*task->status); 264 task->iovcnt -= 2; 265 266 type = req->type; 267 #ifdef VIRTIO_BLK_T_BARRIER 268 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 269 type &= ~VIRTIO_BLK_T_BARRIER; 270 #endif 271 272 switch (type) { 273 case VIRTIO_BLK_T_IN: 274 case VIRTIO_BLK_T_OUT: 275 if (spdk_unlikely((payload_len & (512 - 1)) != 0)) { 276 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 277 type ? "WRITE" : "READ", task->req_idx); 278 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 279 return -1; 280 } 281 282 if (type == VIRTIO_BLK_T_IN) { 283 task->used_len = payload_len + sizeof(*task->status); 284 rc = spdk_bdev_readv(bvdev->bdev_desc, bvdev->bdev_io_channel, 285 &task->iovs[1], task->iovcnt, req->sector * 512, 286 payload_len, blk_request_complete_cb, task); 287 } else if (!bvdev->readonly) { 288 task->used_len = sizeof(*task->status); 289 rc = spdk_bdev_writev(bvdev->bdev_desc, bvdev->bdev_io_channel, 290 &task->iovs[1], task->iovcnt, req->sector * 512, 291 payload_len, blk_request_complete_cb, task); 292 } else { 293 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 294 rc = -1; 295 } 296 297 if (rc) { 298 if (rc == -ENOMEM) { 299 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 300 blk_request_queue_io(task); 301 } else { 302 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 303 return -1; 304 } 305 } 306 break; 307 case VIRTIO_BLK_T_GET_ID: 308 if (!task->iovcnt || !payload_len) { 309 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 310 return -1; 311 } 312 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 313 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 314 task->used_len, ' '); 315 blk_request_finish(true, task); 316 break; 317 default: 318 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 319 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 320 return -1; 321 } 322 323 return 0; 324 } 325 326 static void 327 process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq) 328 { 329 struct spdk_vhost_blk_task *task; 330 int rc; 331 uint16_t reqs[32]; 332 uint16_t reqs_cnt, i; 333 334 reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 335 if (!reqs_cnt) { 336 return; 337 } 338 339 for (i = 0; i < reqs_cnt; i++) { 340 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 341 reqs[i]); 342 343 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 344 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 345 bvdev->vdev.name, reqs[i], vq->vring.size); 346 spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, reqs[i], 0); 347 continue; 348 } 349 350 task = &((struct spdk_vhost_blk_task *)vq->tasks)[reqs[i]]; 351 if (spdk_unlikely(task->used)) { 352 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 353 bvdev->vdev.name, reqs[i]); 354 spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, reqs[i], 0); 355 continue; 356 } 357 358 bvdev->vdev.task_cnt++; 359 360 task->used = true; 361 task->iovcnt = SPDK_COUNTOF(task->iovs); 362 task->status = NULL; 363 task->used_len = 0; 364 365 rc = process_blk_request(task, bvdev, vq); 366 if (rc == 0) { 367 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 368 reqs[i]); 369 } else { 370 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, reqs[i]); 371 } 372 } 373 } 374 375 static int 376 vdev_worker(void *arg) 377 { 378 struct spdk_vhost_blk_dev *bvdev = arg; 379 uint16_t q_idx; 380 381 for (q_idx = 0; q_idx < bvdev->vdev.max_queues; q_idx++) { 382 process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]); 383 } 384 385 spdk_vhost_dev_used_signal(&bvdev->vdev); 386 387 return -1; 388 } 389 390 static void 391 no_bdev_process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq) 392 { 393 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 394 uint32_t length; 395 uint16_t iovcnt, req_idx; 396 397 if (spdk_vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 398 return; 399 } 400 401 iovcnt = SPDK_COUNTOF(iovs); 402 if (blk_iovs_setup(&bvdev->vdev, vq, req_idx, iovs, &iovcnt, &length) == 0) { 403 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 404 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 405 } 406 407 spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, req_idx, 0); 408 } 409 410 static int 411 no_bdev_vdev_worker(void *arg) 412 { 413 struct spdk_vhost_blk_dev *bvdev = arg; 414 uint16_t q_idx; 415 416 for (q_idx = 0; q_idx < bvdev->vdev.max_queues; q_idx++) { 417 no_bdev_process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]); 418 } 419 420 spdk_vhost_dev_used_signal(&bvdev->vdev); 421 422 if (bvdev->vdev.task_cnt == 0 && bvdev->bdev_io_channel) { 423 spdk_put_io_channel(bvdev->bdev_io_channel); 424 bvdev->bdev_io_channel = NULL; 425 } 426 427 return -1; 428 } 429 430 static struct spdk_vhost_blk_dev * 431 to_blk_dev(struct spdk_vhost_dev *vdev) 432 { 433 if (vdev == NULL) { 434 return NULL; 435 } 436 437 if (vdev->backend != &vhost_blk_device_backend) { 438 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 439 return NULL; 440 } 441 442 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 443 } 444 445 struct spdk_bdev * 446 spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev) 447 { 448 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 449 450 assert(bvdev != NULL); 451 return bvdev->bdev; 452 } 453 454 static int 455 _bdev_remove_cb(struct spdk_vhost_dev *vdev, void *arg) 456 { 457 struct spdk_vhost_blk_dev *bvdev = arg; 458 459 SPDK_WARNLOG("Controller %s: Hot-removing bdev - all further requests will fail.\n", 460 bvdev->vdev.name); 461 if (bvdev->requestq_poller) { 462 spdk_poller_unregister(&bvdev->requestq_poller); 463 bvdev->requestq_poller = spdk_poller_register(no_bdev_vdev_worker, bvdev, 0); 464 } 465 466 spdk_bdev_close(bvdev->bdev_desc); 467 bvdev->bdev_desc = NULL; 468 bvdev->bdev = NULL; 469 return 0; 470 } 471 472 static void 473 bdev_remove_cb(void *remove_ctx) 474 { 475 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 476 477 spdk_vhost_call_external_event(bvdev->vdev.name, _bdev_remove_cb, bvdev); 478 } 479 480 static void 481 free_task_pool(struct spdk_vhost_blk_dev *bvdev) 482 { 483 struct spdk_vhost_virtqueue *vq; 484 uint16_t i; 485 486 for (i = 0; i < bvdev->vdev.max_queues; i++) { 487 vq = &bvdev->vdev.virtqueue[i]; 488 if (vq->tasks == NULL) { 489 continue; 490 } 491 492 spdk_dma_free(vq->tasks); 493 vq->tasks = NULL; 494 } 495 } 496 497 static int 498 alloc_task_pool(struct spdk_vhost_blk_dev *bvdev) 499 { 500 struct spdk_vhost_virtqueue *vq; 501 struct spdk_vhost_blk_task *task; 502 uint32_t task_cnt; 503 uint16_t i; 504 uint32_t j; 505 506 for (i = 0; i < bvdev->vdev.max_queues; i++) { 507 vq = &bvdev->vdev.virtqueue[i]; 508 if (vq->vring.desc == NULL) { 509 continue; 510 } 511 512 task_cnt = vq->vring.size; 513 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 514 /* sanity check */ 515 SPDK_ERRLOG("Controller %s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 516 bvdev->vdev.name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 517 free_task_pool(bvdev); 518 return -1; 519 } 520 vq->tasks = spdk_dma_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 521 SPDK_CACHE_LINE_SIZE, NULL); 522 if (vq->tasks == NULL) { 523 SPDK_ERRLOG("Controller %s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 524 bvdev->vdev.name, task_cnt, i); 525 free_task_pool(bvdev); 526 return -1; 527 } 528 529 for (j = 0; j < task_cnt; j++) { 530 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 531 task->bvdev = bvdev; 532 task->req_idx = j; 533 task->vq = vq; 534 } 535 } 536 537 return 0; 538 } 539 540 /* 541 * A new device is added to a data core. First the device is added to the main linked list 542 * and then allocated to a specific data core. 543 * 544 */ 545 static int 546 spdk_vhost_blk_start(struct spdk_vhost_dev *vdev, void *event_ctx) 547 { 548 struct spdk_vhost_blk_dev *bvdev; 549 int i, rc = 0; 550 551 bvdev = to_blk_dev(vdev); 552 if (bvdev == NULL) { 553 SPDK_ERRLOG("Trying to start non-blk controller as a blk one.\n"); 554 rc = -1; 555 goto out; 556 } 557 558 /* validate all I/O queues are in a contiguous index range */ 559 for (i = 0; i < vdev->max_queues; i++) { 560 if (vdev->virtqueue[i].vring.desc == NULL) { 561 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vdev->name, i); 562 rc = -1; 563 goto out; 564 } 565 } 566 567 rc = alloc_task_pool(bvdev); 568 if (rc != 0) { 569 SPDK_ERRLOG("%s: failed to alloc task pool.\n", bvdev->vdev.name); 570 goto out; 571 } 572 573 if (bvdev->bdev) { 574 bvdev->bdev_io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 575 if (!bvdev->bdev_io_channel) { 576 free_task_pool(bvdev); 577 SPDK_ERRLOG("Controller %s: IO channel allocation failed\n", vdev->name); 578 rc = -1; 579 goto out; 580 } 581 } 582 583 bvdev->requestq_poller = spdk_poller_register(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 584 bvdev, 0); 585 SPDK_INFOLOG(SPDK_LOG_VHOST, "Started poller for vhost controller %s on lcore %d\n", 586 vdev->name, vdev->lcore); 587 out: 588 spdk_vhost_dev_backend_event_done(event_ctx, rc); 589 return rc; 590 } 591 592 static int 593 destroy_device_poller_cb(void *arg) 594 { 595 struct spdk_vhost_blk_dev *bvdev = arg; 596 int i; 597 598 if (bvdev->vdev.task_cnt > 0) { 599 return -1; 600 } 601 602 for (i = 0; i < bvdev->vdev.max_queues; i++) { 603 bvdev->vdev.virtqueue[i].next_event_time = 0; 604 spdk_vhost_vq_used_signal(&bvdev->vdev, &bvdev->vdev.virtqueue[i]); 605 } 606 607 SPDK_INFOLOG(SPDK_LOG_VHOST, "Stopping poller for vhost controller %s\n", bvdev->vdev.name); 608 609 if (bvdev->bdev_io_channel) { 610 spdk_put_io_channel(bvdev->bdev_io_channel); 611 bvdev->bdev_io_channel = NULL; 612 } 613 614 free_task_pool(bvdev); 615 spdk_poller_unregister(&bvdev->destroy_ctx.poller); 616 spdk_vhost_dev_backend_event_done(bvdev->destroy_ctx.event_ctx, 0); 617 618 return -1; 619 } 620 621 static int 622 spdk_vhost_blk_stop(struct spdk_vhost_dev *vdev, void *event_ctx) 623 { 624 struct spdk_vhost_blk_dev *bvdev; 625 626 bvdev = to_blk_dev(vdev); 627 if (bvdev == NULL) { 628 SPDK_ERRLOG("Trying to stop non-blk controller as a blk one.\n"); 629 goto err; 630 } 631 632 bvdev->destroy_ctx.event_ctx = event_ctx; 633 spdk_poller_unregister(&bvdev->requestq_poller); 634 bvdev->destroy_ctx.poller = spdk_poller_register(destroy_device_poller_cb, 635 bvdev, 1000); 636 return 0; 637 638 err: 639 spdk_vhost_dev_backend_event_done(event_ctx, -1); 640 return -1; 641 } 642 643 static void 644 spdk_vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 645 { 646 struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev); 647 struct spdk_vhost_blk_dev *bvdev; 648 649 bvdev = to_blk_dev(vdev); 650 if (bvdev == NULL) { 651 return; 652 } 653 654 assert(bvdev != NULL); 655 spdk_json_write_name(w, "block"); 656 spdk_json_write_object_begin(w); 657 658 spdk_json_write_name(w, "readonly"); 659 spdk_json_write_bool(w, bvdev->readonly); 660 661 spdk_json_write_name(w, "bdev"); 662 if (bdev) { 663 spdk_json_write_string(w, spdk_bdev_get_name(bdev)); 664 } else { 665 spdk_json_write_null(w); 666 } 667 668 spdk_json_write_object_end(w); 669 } 670 671 static void 672 spdk_vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 673 { 674 struct spdk_vhost_blk_dev *bvdev; 675 676 bvdev = to_blk_dev(vdev); 677 if (bvdev == NULL) { 678 return; 679 } 680 681 if (!bvdev->bdev) { 682 return; 683 } 684 685 spdk_json_write_object_begin(w); 686 spdk_json_write_named_string(w, "method", "construct_vhost_blk_controller"); 687 688 spdk_json_write_named_object_begin(w, "params"); 689 spdk_json_write_named_string(w, "ctrlr", vdev->name); 690 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 691 spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(vdev->cpumask)); 692 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 693 spdk_json_write_object_end(w); 694 695 spdk_json_write_object_end(w); 696 } 697 698 static int spdk_vhost_blk_destroy(struct spdk_vhost_dev *dev); 699 700 static int 701 spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 702 uint32_t len) 703 { 704 struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config; 705 struct spdk_vhost_blk_dev *bvdev; 706 struct spdk_bdev *bdev; 707 uint32_t blk_size; 708 uint64_t blkcnt; 709 710 bvdev = to_blk_dev(vdev); 711 if (bvdev == NULL) { 712 SPDK_ERRLOG("Trying to get virito_blk configuration failed\n"); 713 return -1; 714 } 715 716 if (len < sizeof(*blkcfg)) { 717 return -1; 718 } 719 720 bdev = bvdev->bdev; 721 if (bdev == NULL) { 722 /* We can't just return -1 here as this GET_CONFIG message might 723 * be caused by a QEMU VM reboot. Returning -1 will indicate an 724 * error to QEMU, who might then decide to terminate itself. 725 * We don't want that. A simple reboot shouldn't break the system. 726 * 727 * Presenting a block device with block size 0 and block count 0 728 * doesn't cause any problems on QEMU side and the virtio-pci 729 * device is even still available inside the VM, but there will 730 * be no block device created for it - the kernel drivers will 731 * silently reject it. 732 */ 733 blk_size = 0; 734 blkcnt = 0; 735 } else { 736 blk_size = spdk_bdev_get_block_size(bdev); 737 blkcnt = spdk_bdev_get_num_blocks(bdev); 738 } 739 740 memset(blkcfg, 0, sizeof(*blkcfg)); 741 blkcfg->blk_size = blk_size; 742 /* minimum I/O size in blocks */ 743 blkcfg->min_io_size = 1; 744 /* expressed in 512 Bytes sectors */ 745 blkcfg->capacity = (blkcnt * blk_size) / 512; 746 blkcfg->size_max = 131072; 747 /* -2 for REQ and RESP and -1 for region boundary splitting */ 748 blkcfg->seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 749 /* QEMU can overwrite this value when started */ 750 blkcfg->num_queues = SPDK_VHOST_MAX_VQUEUES; 751 752 return 0; 753 } 754 755 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 756 .virtio_features = SPDK_VHOST_FEATURES | 757 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | 758 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_RO) | 759 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | (1ULL << VIRTIO_BLK_F_TOPOLOGY) | 760 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | 761 (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 762 (1ULL << VIRTIO_BLK_F_MQ), 763 .disabled_features = SPDK_VHOST_DISABLED_FEATURES | (1ULL << VIRTIO_BLK_F_GEOMETRY) | 764 (1ULL << VIRTIO_BLK_F_RO) | (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 765 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI), 766 .start_device = spdk_vhost_blk_start, 767 .stop_device = spdk_vhost_blk_stop, 768 .vhost_get_config = spdk_vhost_blk_get_config, 769 .dump_info_json = spdk_vhost_blk_dump_info_json, 770 .write_config_json = spdk_vhost_blk_write_config_json, 771 .remove_device = spdk_vhost_blk_destroy, 772 }; 773 774 int 775 spdk_vhost_blk_controller_construct(void) 776 { 777 struct spdk_conf_section *sp; 778 unsigned ctrlr_num; 779 char *bdev_name; 780 char *cpumask; 781 char *name; 782 bool readonly; 783 784 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 785 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 786 continue; 787 } 788 789 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 790 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 791 spdk_conf_section_get_name(sp)); 792 return -1; 793 } 794 795 name = spdk_conf_section_get_val(sp, "Name"); 796 if (name == NULL) { 797 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 798 return -1; 799 } 800 801 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 802 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 803 804 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 805 if (bdev_name == NULL) { 806 continue; 807 } 808 809 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, readonly) < 0) { 810 return -1; 811 } 812 } 813 814 return 0; 815 } 816 817 int 818 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, bool readonly) 819 { 820 struct spdk_vhost_blk_dev *bvdev = NULL; 821 struct spdk_bdev *bdev; 822 int ret = 0; 823 824 spdk_vhost_lock(); 825 bdev = spdk_bdev_get_by_name(dev_name); 826 if (bdev == NULL) { 827 SPDK_ERRLOG("Controller %s: bdev '%s' not found\n", 828 name, dev_name); 829 ret = -ENODEV; 830 goto out; 831 } 832 833 bvdev = spdk_dma_zmalloc(sizeof(*bvdev), SPDK_CACHE_LINE_SIZE, NULL); 834 if (bvdev == NULL) { 835 ret = -ENOMEM; 836 goto out; 837 } 838 839 ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc); 840 if (ret != 0) { 841 SPDK_ERRLOG("Controller %s: could not open bdev '%s', error=%d\n", 842 name, dev_name, ret); 843 goto out; 844 } 845 846 bvdev->bdev = bdev; 847 bvdev->readonly = readonly; 848 ret = spdk_vhost_dev_register(&bvdev->vdev, name, cpumask, &vhost_blk_device_backend); 849 if (ret != 0) { 850 spdk_bdev_close(bvdev->bdev_desc); 851 goto out; 852 } 853 854 if (readonly && rte_vhost_driver_enable_features(bvdev->vdev.path, (1ULL << VIRTIO_BLK_F_RO))) { 855 SPDK_ERRLOG("Controller %s: failed to set as a readonly\n", name); 856 spdk_bdev_close(bvdev->bdev_desc); 857 858 if (spdk_vhost_dev_unregister(&bvdev->vdev) != 0) { 859 SPDK_ERRLOG("Controller %s: failed to remove controller\n", name); 860 } 861 862 ret = -1; 863 goto out; 864 } 865 866 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: using bdev '%s'\n", name, dev_name); 867 out: 868 if (ret != 0 && bvdev) { 869 spdk_dma_free(bvdev); 870 } 871 spdk_vhost_unlock(); 872 return ret; 873 } 874 875 static int 876 spdk_vhost_blk_destroy(struct spdk_vhost_dev *vdev) 877 { 878 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 879 int rc; 880 881 if (!bvdev) { 882 return -EINVAL; 883 } 884 885 rc = spdk_vhost_dev_unregister(&bvdev->vdev); 886 if (rc != 0) { 887 return rc; 888 } 889 890 if (bvdev->bdev_desc) { 891 spdk_bdev_close(bvdev->bdev_desc); 892 bvdev->bdev_desc = NULL; 893 } 894 bvdev->bdev = NULL; 895 896 spdk_dma_free(bvdev); 897 return 0; 898 } 899 900 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 901 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 902