1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/thread.h" 40 #include "spdk/likely.h" 41 #include "spdk/string.h" 42 #include "spdk/util.h" 43 #include "spdk/vhost.h" 44 45 #include "vhost_internal.h" 46 47 struct spdk_vhost_blk_task { 48 struct spdk_bdev_io *bdev_io; 49 struct spdk_vhost_blk_dev *bvdev; 50 struct spdk_vhost_virtqueue *vq; 51 52 volatile uint8_t *status; 53 54 uint16_t req_idx; 55 56 /* for io wait */ 57 struct spdk_bdev_io_wait_entry bdev_io_wait; 58 59 /* If set, the task is currently used for I/O processing. */ 60 bool used; 61 62 /** Number of bytes that were written. */ 63 uint32_t used_len; 64 uint16_t iovcnt; 65 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 66 }; 67 68 struct spdk_vhost_blk_dev { 69 struct spdk_vhost_dev vdev; 70 struct spdk_bdev *bdev; 71 struct spdk_bdev_desc *bdev_desc; 72 struct spdk_io_channel *bdev_io_channel; 73 struct spdk_poller *requestq_poller; 74 struct spdk_vhost_dev_destroy_ctx destroy_ctx; 75 bool readonly; 76 }; 77 78 /* forward declaration */ 79 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 80 81 static int 82 process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev, 83 struct spdk_vhost_virtqueue *vq); 84 85 static void 86 blk_task_finish(struct spdk_vhost_blk_task *task) 87 { 88 assert(task->bvdev->vdev.session->task_cnt > 0); 89 task->bvdev->vdev.session->task_cnt--; 90 task->used = false; 91 } 92 93 static void 94 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 95 { 96 if (task->status) { 97 *task->status = status; 98 } 99 100 spdk_vhost_vq_used_ring_enqueue(task->bvdev->vdev.session, task->vq, task->req_idx, 101 task->used_len); 102 blk_task_finish(task); 103 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 104 } 105 106 /* 107 * Process task's descriptor chain and setup data related fields. 108 * Return 109 * total size of suplied buffers 110 * 111 * FIXME: Make this function return to rd_cnt and wr_cnt 112 */ 113 static int 114 blk_iovs_setup(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq, uint16_t req_idx, 115 struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 116 { 117 struct vring_desc *desc, *desc_table; 118 uint16_t out_cnt = 0, cnt = 0; 119 uint32_t desc_table_size, len = 0; 120 int rc; 121 122 rc = spdk_vhost_vq_get_desc(vdev->session, vq, req_idx, &desc, &desc_table, &desc_table_size); 123 if (rc != 0) { 124 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 125 return -1; 126 } 127 128 while (1) { 129 /* 130 * Maximum cnt reached? 131 * Should not happen if request is well formatted, otherwise this is a BUG. 132 */ 133 if (spdk_unlikely(cnt == *iovs_cnt)) { 134 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Max IOVs in request reached (req_idx = %"PRIu16").\n", 135 req_idx); 136 return -1; 137 } 138 139 if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev->session, iovs, &cnt, desc))) { 140 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 141 req_idx, cnt); 142 return -1; 143 } 144 145 len += desc->len; 146 147 out_cnt += spdk_vhost_vring_desc_is_wr(desc); 148 149 rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 150 if (rc != 0) { 151 SPDK_ERRLOG("%s: Descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 152 vdev->name, req_idx); 153 return -1; 154 } else if (desc == NULL) { 155 break; 156 } 157 } 158 159 /* 160 * There must be least two descriptors. 161 * First contain request so it must be readable. 162 * Last descriptor contain buffer for response so it must be writable. 163 */ 164 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 165 return -1; 166 } 167 168 *length = len; 169 *iovs_cnt = cnt; 170 return 0; 171 } 172 173 static void 174 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 175 { 176 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 177 spdk_vhost_vq_used_ring_enqueue(task->bvdev->vdev.session, task->vq, task->req_idx, 178 task->used_len); 179 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 180 task->req_idx, success ? "OK" : "FAIL"); 181 blk_task_finish(task); 182 } 183 184 static void 185 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 186 { 187 struct spdk_vhost_blk_task *task = cb_arg; 188 189 spdk_bdev_free_io(bdev_io); 190 blk_request_finish(success, task); 191 } 192 193 static void 194 blk_request_resubmit(void *arg) 195 { 196 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 197 int rc = 0; 198 199 rc = process_blk_request(task, task->bvdev, task->vq); 200 if (rc == 0) { 201 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 202 } else { 203 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 204 } 205 } 206 207 static inline void 208 blk_request_queue_io(struct spdk_vhost_blk_task *task) 209 { 210 int rc; 211 struct spdk_vhost_blk_dev *bvdev = task->bvdev; 212 struct spdk_bdev *bdev = bvdev->bdev; 213 214 task->bdev_io_wait.bdev = bdev; 215 task->bdev_io_wait.cb_fn = blk_request_resubmit; 216 task->bdev_io_wait.cb_arg = task; 217 218 rc = spdk_bdev_queue_io_wait(bdev, bvdev->bdev_io_channel, &task->bdev_io_wait); 219 if (rc != 0) { 220 SPDK_ERRLOG("Queue io failed in vhost_blk, rc=%d\n", rc); 221 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 222 } 223 } 224 225 static int 226 process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev, 227 struct spdk_vhost_virtqueue *vq) 228 { 229 const struct virtio_blk_outhdr *req; 230 struct virtio_blk_discard_write_zeroes *desc; 231 struct iovec *iov; 232 uint32_t type; 233 uint32_t payload_len; 234 int rc; 235 236 if (blk_iovs_setup(&bvdev->vdev, vq, task->req_idx, task->iovs, &task->iovcnt, &payload_len)) { 237 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 238 /* Only READ and WRITE are supported for now. */ 239 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 240 return -1; 241 } 242 243 iov = &task->iovs[0]; 244 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 245 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 246 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 247 iov->iov_len, sizeof(*req), task->req_idx); 248 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 249 return -1; 250 } 251 252 req = iov->iov_base; 253 254 iov = &task->iovs[task->iovcnt - 1]; 255 if (spdk_unlikely(iov->iov_len != 1)) { 256 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 257 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 258 iov->iov_len, 1, task->req_idx); 259 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 260 return -1; 261 } 262 263 task->status = iov->iov_base; 264 payload_len -= sizeof(*req) + sizeof(*task->status); 265 task->iovcnt -= 2; 266 267 type = req->type; 268 #ifdef VIRTIO_BLK_T_BARRIER 269 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 270 type &= ~VIRTIO_BLK_T_BARRIER; 271 #endif 272 273 switch (type) { 274 case VIRTIO_BLK_T_IN: 275 case VIRTIO_BLK_T_OUT: 276 if (spdk_unlikely((payload_len & (512 - 1)) != 0)) { 277 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 278 type ? "WRITE" : "READ", task->req_idx); 279 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 280 return -1; 281 } 282 283 if (type == VIRTIO_BLK_T_IN) { 284 task->used_len = payload_len + sizeof(*task->status); 285 rc = spdk_bdev_readv(bvdev->bdev_desc, bvdev->bdev_io_channel, 286 &task->iovs[1], task->iovcnt, req->sector * 512, 287 payload_len, blk_request_complete_cb, task); 288 } else if (!bvdev->readonly) { 289 task->used_len = sizeof(*task->status); 290 rc = spdk_bdev_writev(bvdev->bdev_desc, bvdev->bdev_io_channel, 291 &task->iovs[1], task->iovcnt, req->sector * 512, 292 payload_len, blk_request_complete_cb, task); 293 } else { 294 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 295 rc = -1; 296 } 297 298 if (rc) { 299 if (rc == -ENOMEM) { 300 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 301 blk_request_queue_io(task); 302 } else { 303 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 304 return -1; 305 } 306 } 307 break; 308 case VIRTIO_BLK_T_DISCARD: 309 desc = task->iovs[1].iov_base; 310 if (payload_len != sizeof(*desc)) { 311 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 312 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 313 return -1; 314 } 315 316 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvdev->bdev_io_channel, 317 desc->sector * 512, desc->num_sectors * 512, 318 blk_request_complete_cb, task); 319 if (rc) { 320 if (rc == -ENOMEM) { 321 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 322 blk_request_queue_io(task); 323 } else { 324 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 325 return -1; 326 } 327 } 328 break; 329 case VIRTIO_BLK_T_WRITE_ZEROES: 330 desc = task->iovs[1].iov_base; 331 if (payload_len != sizeof(*desc)) { 332 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 333 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 334 return -1; 335 } 336 337 /* Zeroed and Unmap the range, SPDK doen't support it. */ 338 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 339 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 340 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 341 return -1; 342 } 343 344 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvdev->bdev_io_channel, 345 desc->sector * 512, desc->num_sectors * 512, 346 blk_request_complete_cb, task); 347 if (rc) { 348 if (rc == -ENOMEM) { 349 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 350 blk_request_queue_io(task); 351 } else { 352 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 353 return -1; 354 } 355 } 356 break; 357 case VIRTIO_BLK_T_GET_ID: 358 if (!task->iovcnt || !payload_len) { 359 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 360 return -1; 361 } 362 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 363 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 364 task->used_len, ' '); 365 blk_request_finish(true, task); 366 break; 367 default: 368 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 369 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 370 return -1; 371 } 372 373 return 0; 374 } 375 376 static void 377 process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq) 378 { 379 struct spdk_vhost_blk_task *task; 380 struct spdk_vhost_session *vsession = bvdev->vdev.session; 381 int rc; 382 uint16_t reqs[32]; 383 uint16_t reqs_cnt, i; 384 385 reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 386 if (!reqs_cnt) { 387 return; 388 } 389 390 for (i = 0; i < reqs_cnt; i++) { 391 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 392 reqs[i]); 393 394 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 395 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 396 bvdev->vdev.name, reqs[i], vq->vring.size); 397 spdk_vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 398 continue; 399 } 400 401 task = &((struct spdk_vhost_blk_task *)vq->tasks)[reqs[i]]; 402 if (spdk_unlikely(task->used)) { 403 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 404 bvdev->vdev.name, reqs[i]); 405 spdk_vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 406 continue; 407 } 408 409 vsession->task_cnt++; 410 411 task->used = true; 412 task->iovcnt = SPDK_COUNTOF(task->iovs); 413 task->status = NULL; 414 task->used_len = 0; 415 416 rc = process_blk_request(task, bvdev, vq); 417 if (rc == 0) { 418 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 419 reqs[i]); 420 } else { 421 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, reqs[i]); 422 } 423 } 424 } 425 426 static int 427 vdev_worker(void *arg) 428 { 429 struct spdk_vhost_blk_dev *bvdev = arg; 430 struct spdk_vhost_session *vsession = bvdev->vdev.session; 431 uint16_t q_idx; 432 433 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 434 process_vq(bvdev, &vsession->virtqueue[q_idx]); 435 } 436 437 spdk_vhost_session_used_signal(vsession); 438 439 return -1; 440 } 441 442 static void 443 no_bdev_process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq) 444 { 445 struct spdk_vhost_session *vsession = bvdev->vdev.session; 446 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 447 uint32_t length; 448 uint16_t iovcnt, req_idx; 449 450 if (spdk_vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 451 return; 452 } 453 454 iovcnt = SPDK_COUNTOF(iovs); 455 if (blk_iovs_setup(&bvdev->vdev, vq, req_idx, iovs, &iovcnt, &length) == 0) { 456 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 457 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 458 } 459 460 spdk_vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 461 } 462 463 static int 464 no_bdev_vdev_worker(void *arg) 465 { 466 struct spdk_vhost_blk_dev *bvdev = arg; 467 struct spdk_vhost_session *vsession = bvdev->vdev.session; 468 uint16_t q_idx; 469 470 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 471 no_bdev_process_vq(bvdev, &vsession->virtqueue[q_idx]); 472 } 473 474 spdk_vhost_session_used_signal(vsession); 475 476 if (vsession->task_cnt == 0 && bvdev->bdev_io_channel) { 477 spdk_put_io_channel(bvdev->bdev_io_channel); 478 bvdev->bdev_io_channel = NULL; 479 } 480 481 return -1; 482 } 483 484 static struct spdk_vhost_blk_dev * 485 to_blk_dev(struct spdk_vhost_dev *vdev) 486 { 487 if (vdev == NULL) { 488 return NULL; 489 } 490 491 if (vdev->backend != &vhost_blk_device_backend) { 492 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 493 return NULL; 494 } 495 496 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 497 } 498 499 struct spdk_bdev * 500 spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev) 501 { 502 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 503 504 assert(bvdev != NULL); 505 return bvdev->bdev; 506 } 507 508 static int 509 _bdev_remove_cb(struct spdk_vhost_dev *vdev, void *arg) 510 { 511 struct spdk_vhost_blk_dev *bvdev = arg; 512 513 SPDK_WARNLOG("Controller %s: Hot-removing bdev - all further requests will fail.\n", 514 bvdev->vdev.name); 515 if (bvdev->requestq_poller) { 516 spdk_poller_unregister(&bvdev->requestq_poller); 517 bvdev->requestq_poller = spdk_poller_register(no_bdev_vdev_worker, bvdev, 0); 518 } 519 520 spdk_bdev_close(bvdev->bdev_desc); 521 bvdev->bdev_desc = NULL; 522 bvdev->bdev = NULL; 523 return 0; 524 } 525 526 static void 527 bdev_remove_cb(void *remove_ctx) 528 { 529 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 530 531 spdk_vhost_call_external_event(bvdev->vdev.name, _bdev_remove_cb, bvdev); 532 } 533 534 static void 535 free_task_pool(struct spdk_vhost_blk_dev *bvdev) 536 { 537 struct spdk_vhost_session *vsession = bvdev->vdev.session; 538 struct spdk_vhost_virtqueue *vq; 539 uint16_t i; 540 541 for (i = 0; i < vsession->max_queues; i++) { 542 vq = &vsession->virtqueue[i]; 543 if (vq->tasks == NULL) { 544 continue; 545 } 546 547 spdk_dma_free(vq->tasks); 548 vq->tasks = NULL; 549 } 550 } 551 552 static int 553 alloc_task_pool(struct spdk_vhost_blk_dev *bvdev) 554 { 555 struct spdk_vhost_session *vsession = bvdev->vdev.session; 556 struct spdk_vhost_virtqueue *vq; 557 struct spdk_vhost_blk_task *task; 558 uint32_t task_cnt; 559 uint16_t i; 560 uint32_t j; 561 562 for (i = 0; i < vsession->max_queues; i++) { 563 vq = &vsession->virtqueue[i]; 564 if (vq->vring.desc == NULL) { 565 continue; 566 } 567 568 task_cnt = vq->vring.size; 569 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 570 /* sanity check */ 571 SPDK_ERRLOG("Controller %s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 572 bvdev->vdev.name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 573 free_task_pool(bvdev); 574 return -1; 575 } 576 vq->tasks = spdk_dma_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 577 SPDK_CACHE_LINE_SIZE, NULL); 578 if (vq->tasks == NULL) { 579 SPDK_ERRLOG("Controller %s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 580 bvdev->vdev.name, task_cnt, i); 581 free_task_pool(bvdev); 582 return -1; 583 } 584 585 for (j = 0; j < task_cnt; j++) { 586 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 587 task->bvdev = bvdev; 588 task->req_idx = j; 589 task->vq = vq; 590 } 591 } 592 593 return 0; 594 } 595 596 /* 597 * A new device is added to a data core. First the device is added to the main linked list 598 * and then allocated to a specific data core. 599 * 600 */ 601 static int 602 spdk_vhost_blk_start(struct spdk_vhost_dev *vdev, void *event_ctx) 603 { 604 struct spdk_vhost_blk_dev *bvdev; 605 struct spdk_vhost_session *vsession = vdev->session; 606 int i, rc = 0; 607 608 bvdev = to_blk_dev(vdev); 609 if (bvdev == NULL) { 610 SPDK_ERRLOG("Trying to start non-blk controller as a blk one.\n"); 611 rc = -1; 612 goto out; 613 } 614 615 /* validate all I/O queues are in a contiguous index range */ 616 for (i = 0; i < vsession->max_queues; i++) { 617 if (vsession->virtqueue[i].vring.desc == NULL) { 618 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vdev->name, i); 619 rc = -1; 620 goto out; 621 } 622 } 623 624 rc = alloc_task_pool(bvdev); 625 if (rc != 0) { 626 SPDK_ERRLOG("%s: failed to alloc task pool.\n", bvdev->vdev.name); 627 goto out; 628 } 629 630 if (bvdev->bdev) { 631 bvdev->bdev_io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 632 if (!bvdev->bdev_io_channel) { 633 free_task_pool(bvdev); 634 SPDK_ERRLOG("Controller %s: IO channel allocation failed\n", vdev->name); 635 rc = -1; 636 goto out; 637 } 638 } 639 640 bvdev->requestq_poller = spdk_poller_register(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 641 bvdev, 0); 642 SPDK_INFOLOG(SPDK_LOG_VHOST, "Started poller for vhost controller %s on lcore %d\n", 643 vdev->name, vdev->lcore); 644 out: 645 spdk_vhost_dev_backend_event_done(event_ctx, rc); 646 return rc; 647 } 648 649 static int 650 destroy_device_poller_cb(void *arg) 651 { 652 struct spdk_vhost_blk_dev *bvdev = arg; 653 struct spdk_vhost_session *vsession = bvdev->vdev.session; 654 int i; 655 656 if (vsession->task_cnt > 0) { 657 return -1; 658 } 659 660 for (i = 0; i < vsession->max_queues; i++) { 661 vsession->virtqueue[i].next_event_time = 0; 662 spdk_vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 663 } 664 665 SPDK_INFOLOG(SPDK_LOG_VHOST, "Stopping poller for vhost controller %s\n", bvdev->vdev.name); 666 667 if (bvdev->bdev_io_channel) { 668 spdk_put_io_channel(bvdev->bdev_io_channel); 669 bvdev->bdev_io_channel = NULL; 670 } 671 672 free_task_pool(bvdev); 673 spdk_poller_unregister(&bvdev->destroy_ctx.poller); 674 spdk_vhost_dev_backend_event_done(bvdev->destroy_ctx.event_ctx, 0); 675 676 return -1; 677 } 678 679 static int 680 spdk_vhost_blk_stop(struct spdk_vhost_dev *vdev, void *event_ctx) 681 { 682 struct spdk_vhost_blk_dev *bvdev; 683 684 bvdev = to_blk_dev(vdev); 685 if (bvdev == NULL) { 686 SPDK_ERRLOG("Trying to stop non-blk controller as a blk one.\n"); 687 goto err; 688 } 689 690 bvdev->destroy_ctx.event_ctx = event_ctx; 691 spdk_poller_unregister(&bvdev->requestq_poller); 692 bvdev->destroy_ctx.poller = spdk_poller_register(destroy_device_poller_cb, 693 bvdev, 1000); 694 return 0; 695 696 err: 697 spdk_vhost_dev_backend_event_done(event_ctx, -1); 698 return -1; 699 } 700 701 static void 702 spdk_vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 703 { 704 struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev); 705 struct spdk_vhost_blk_dev *bvdev; 706 707 bvdev = to_blk_dev(vdev); 708 if (bvdev == NULL) { 709 return; 710 } 711 712 assert(bvdev != NULL); 713 spdk_json_write_name(w, "block"); 714 spdk_json_write_object_begin(w); 715 716 spdk_json_write_name(w, "readonly"); 717 spdk_json_write_bool(w, bvdev->readonly); 718 719 spdk_json_write_name(w, "bdev"); 720 if (bdev) { 721 spdk_json_write_string(w, spdk_bdev_get_name(bdev)); 722 } else { 723 spdk_json_write_null(w); 724 } 725 726 spdk_json_write_object_end(w); 727 } 728 729 static void 730 spdk_vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 731 { 732 struct spdk_vhost_blk_dev *bvdev; 733 734 bvdev = to_blk_dev(vdev); 735 if (bvdev == NULL) { 736 return; 737 } 738 739 if (!bvdev->bdev) { 740 return; 741 } 742 743 spdk_json_write_object_begin(w); 744 spdk_json_write_named_string(w, "method", "construct_vhost_blk_controller"); 745 746 spdk_json_write_named_object_begin(w, "params"); 747 spdk_json_write_named_string(w, "ctrlr", vdev->name); 748 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 749 spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(vdev->cpumask)); 750 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 751 spdk_json_write_object_end(w); 752 753 spdk_json_write_object_end(w); 754 } 755 756 static int spdk_vhost_blk_destroy(struct spdk_vhost_dev *dev); 757 758 static int 759 spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 760 uint32_t len) 761 { 762 struct virtio_blk_config blkcfg; 763 struct spdk_vhost_blk_dev *bvdev; 764 struct spdk_bdev *bdev; 765 uint32_t blk_size; 766 uint64_t blkcnt; 767 768 bvdev = to_blk_dev(vdev); 769 if (bvdev == NULL) { 770 SPDK_ERRLOG("Trying to get virito_blk configuration failed\n"); 771 return -1; 772 } 773 774 bdev = bvdev->bdev; 775 if (bdev == NULL) { 776 /* We can't just return -1 here as this GET_CONFIG message might 777 * be caused by a QEMU VM reboot. Returning -1 will indicate an 778 * error to QEMU, who might then decide to terminate itself. 779 * We don't want that. A simple reboot shouldn't break the system. 780 * 781 * Presenting a block device with block size 0 and block count 0 782 * doesn't cause any problems on QEMU side and the virtio-pci 783 * device is even still available inside the VM, but there will 784 * be no block device created for it - the kernel drivers will 785 * silently reject it. 786 */ 787 blk_size = 0; 788 blkcnt = 0; 789 } else { 790 blk_size = spdk_bdev_get_block_size(bdev); 791 blkcnt = spdk_bdev_get_num_blocks(bdev); 792 } 793 794 memset(&blkcfg, 0, sizeof(blkcfg)); 795 blkcfg.blk_size = blk_size; 796 /* minimum I/O size in blocks */ 797 blkcfg.min_io_size = 1; 798 /* expressed in 512 Bytes sectors */ 799 blkcfg.capacity = (blkcnt * blk_size) / 512; 800 blkcfg.size_max = 131072; 801 /* -2 for REQ and RESP and -1 for region boundary splitting */ 802 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 803 /* QEMU can overwrite this value when started */ 804 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 805 806 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 807 /* 16MiB, expressed in 512 Bytes */ 808 blkcfg.max_discard_sectors = 32768; 809 blkcfg.max_discard_seg = 1; 810 blkcfg.discard_sector_alignment = blk_size / 512; 811 } 812 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 813 blkcfg.max_write_zeroes_sectors = 32768; 814 blkcfg.max_write_zeroes_seg = 1; 815 } 816 817 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 818 819 return 0; 820 } 821 822 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 823 .virtio_features = SPDK_VHOST_FEATURES | 824 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | 825 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_RO) | 826 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | (1ULL << VIRTIO_BLK_F_TOPOLOGY) | 827 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | 828 (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 829 (1ULL << VIRTIO_BLK_F_MQ) | (1ULL << VIRTIO_BLK_F_DISCARD) | 830 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES), 831 .disabled_features = SPDK_VHOST_DISABLED_FEATURES | (1ULL << VIRTIO_BLK_F_GEOMETRY) | 832 (1ULL << VIRTIO_BLK_F_RO) | (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 833 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_DISCARD) | 834 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES), 835 .start_device = spdk_vhost_blk_start, 836 .stop_device = spdk_vhost_blk_stop, 837 .vhost_get_config = spdk_vhost_blk_get_config, 838 .dump_info_json = spdk_vhost_blk_dump_info_json, 839 .write_config_json = spdk_vhost_blk_write_config_json, 840 .remove_device = spdk_vhost_blk_destroy, 841 }; 842 843 int 844 spdk_vhost_blk_controller_construct(void) 845 { 846 struct spdk_conf_section *sp; 847 unsigned ctrlr_num; 848 char *bdev_name; 849 char *cpumask; 850 char *name; 851 bool readonly; 852 853 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 854 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 855 continue; 856 } 857 858 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 859 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 860 spdk_conf_section_get_name(sp)); 861 return -1; 862 } 863 864 name = spdk_conf_section_get_val(sp, "Name"); 865 if (name == NULL) { 866 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 867 return -1; 868 } 869 870 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 871 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 872 873 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 874 if (bdev_name == NULL) { 875 continue; 876 } 877 878 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, readonly) < 0) { 879 return -1; 880 } 881 } 882 883 return 0; 884 } 885 886 int 887 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, bool readonly) 888 { 889 struct spdk_vhost_blk_dev *bvdev = NULL; 890 struct spdk_bdev *bdev; 891 uint64_t features = 0; 892 int ret = 0; 893 894 spdk_vhost_lock(); 895 bdev = spdk_bdev_get_by_name(dev_name); 896 if (bdev == NULL) { 897 SPDK_ERRLOG("Controller %s: bdev '%s' not found\n", 898 name, dev_name); 899 ret = -ENODEV; 900 goto out; 901 } 902 903 bvdev = spdk_dma_zmalloc(sizeof(*bvdev), SPDK_CACHE_LINE_SIZE, NULL); 904 if (bvdev == NULL) { 905 ret = -ENOMEM; 906 goto out; 907 } 908 909 ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc); 910 if (ret != 0) { 911 SPDK_ERRLOG("Controller %s: could not open bdev '%s', error=%d\n", 912 name, dev_name, ret); 913 goto out; 914 } 915 916 bvdev->bdev = bdev; 917 bvdev->readonly = readonly; 918 ret = spdk_vhost_dev_register(&bvdev->vdev, name, cpumask, &vhost_blk_device_backend); 919 if (ret != 0) { 920 spdk_bdev_close(bvdev->bdev_desc); 921 goto out; 922 } 923 924 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 925 features |= (1ULL << VIRTIO_BLK_F_DISCARD); 926 } 927 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 928 features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 929 } 930 if (readonly) { 931 features |= (1ULL << VIRTIO_BLK_F_RO); 932 } 933 934 if (features && rte_vhost_driver_enable_features(bvdev->vdev.path, features)) { 935 SPDK_ERRLOG("Controller %s: failed to enable features 0x%"PRIx64"\n", name, features); 936 937 if (spdk_vhost_dev_unregister(&bvdev->vdev) != 0) { 938 SPDK_ERRLOG("Controller %s: failed to remove controller\n", name); 939 } 940 941 spdk_bdev_close(bvdev->bdev_desc); 942 ret = -1; 943 goto out; 944 } 945 946 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: using bdev '%s'\n", name, dev_name); 947 out: 948 if (ret != 0 && bvdev) { 949 spdk_dma_free(bvdev); 950 } 951 spdk_vhost_unlock(); 952 return ret; 953 } 954 955 static int 956 spdk_vhost_blk_destroy(struct spdk_vhost_dev *vdev) 957 { 958 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 959 int rc; 960 961 if (!bvdev) { 962 return -EINVAL; 963 } 964 965 rc = spdk_vhost_dev_unregister(&bvdev->vdev); 966 if (rc != 0) { 967 return rc; 968 } 969 970 if (bvdev->bdev_desc) { 971 spdk_bdev_close(bvdev->bdev_desc); 972 bvdev->bdev_desc = NULL; 973 } 974 bvdev->bdev = NULL; 975 976 spdk_dma_free(bvdev); 977 return 0; 978 } 979 980 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 981 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 982