1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/thread.h" 40 #include "spdk/likely.h" 41 #include "spdk/string.h" 42 #include "spdk/util.h" 43 #include "spdk/vhost.h" 44 45 #include "vhost_internal.h" 46 47 struct spdk_vhost_blk_task { 48 struct spdk_bdev_io *bdev_io; 49 struct spdk_vhost_blk_session *bvsession; 50 struct spdk_vhost_virtqueue *vq; 51 52 volatile uint8_t *status; 53 54 uint16_t req_idx; 55 56 /* for io wait */ 57 struct spdk_bdev_io_wait_entry bdev_io_wait; 58 59 /* If set, the task is currently used for I/O processing. */ 60 bool used; 61 62 /** Number of bytes that were written. */ 63 uint32_t used_len; 64 uint16_t iovcnt; 65 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 66 }; 67 68 struct spdk_vhost_blk_dev { 69 struct spdk_vhost_dev vdev; 70 struct spdk_bdev *bdev; 71 struct spdk_bdev_desc *bdev_desc; 72 bool readonly; 73 }; 74 75 struct spdk_vhost_blk_session { 76 /* The parent session must be the very first field in this struct */ 77 struct spdk_vhost_session vsession; 78 struct spdk_vhost_blk_dev *bvdev; 79 struct spdk_poller *requestq_poller; 80 struct spdk_io_channel *io_channel; 81 struct spdk_vhost_dev_destroy_ctx destroy_ctx; 82 }; 83 84 /* forward declaration */ 85 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 86 87 static int 88 process_blk_request(struct spdk_vhost_blk_task *task, 89 struct spdk_vhost_blk_session *bvsession, 90 struct spdk_vhost_virtqueue *vq); 91 92 static void 93 blk_task_finish(struct spdk_vhost_blk_task *task) 94 { 95 assert(task->bvsession->vsession.task_cnt > 0); 96 task->bvsession->vsession.task_cnt--; 97 task->used = false; 98 } 99 100 static void 101 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 102 { 103 if (task->status) { 104 *task->status = status; 105 } 106 107 spdk_vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, task->req_idx, 108 task->used_len); 109 blk_task_finish(task); 110 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 111 } 112 113 /* 114 * Process task's descriptor chain and setup data related fields. 115 * Return 116 * total size of suplied buffers 117 * 118 * FIXME: Make this function return to rd_cnt and wr_cnt 119 */ 120 static int 121 blk_iovs_setup(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq, 122 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 123 { 124 struct spdk_vhost_session *vsession = &bvsession->vsession; 125 struct spdk_vhost_dev *vdev = vsession->vdev; 126 struct vring_desc *desc, *desc_table; 127 uint16_t out_cnt = 0, cnt = 0; 128 uint32_t desc_table_size, len = 0; 129 uint32_t desc_handled_cnt; 130 int rc; 131 132 rc = spdk_vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size); 133 if (rc != 0) { 134 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 135 return -1; 136 } 137 138 desc_handled_cnt = 0; 139 while (1) { 140 /* 141 * Maximum cnt reached? 142 * Should not happen if request is well formatted, otherwise this is a BUG. 143 */ 144 if (spdk_unlikely(cnt == *iovs_cnt)) { 145 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Max IOVs in request reached (req_idx = %"PRIu16").\n", 146 req_idx); 147 return -1; 148 } 149 150 if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) { 151 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 152 req_idx, cnt); 153 return -1; 154 } 155 156 len += desc->len; 157 158 out_cnt += spdk_vhost_vring_desc_is_wr(desc); 159 160 rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 161 if (rc != 0) { 162 SPDK_ERRLOG("%s: Descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 163 vdev->name, req_idx); 164 return -1; 165 } else if (desc == NULL) { 166 break; 167 } 168 169 desc_handled_cnt++; 170 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { 171 /* Break a cycle and report an error, if any. */ 172 SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n", 173 vdev->name, desc_table_size, desc_handled_cnt); 174 return -1; 175 } 176 } 177 178 /* 179 * There must be least two descriptors. 180 * First contain request so it must be readable. 181 * Last descriptor contain buffer for response so it must be writable. 182 */ 183 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 184 return -1; 185 } 186 187 *length = len; 188 *iovs_cnt = cnt; 189 return 0; 190 } 191 192 static void 193 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 194 { 195 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 196 spdk_vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, task->req_idx, 197 task->used_len); 198 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 199 task->req_idx, success ? "OK" : "FAIL"); 200 blk_task_finish(task); 201 } 202 203 static void 204 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 205 { 206 struct spdk_vhost_blk_task *task = cb_arg; 207 208 spdk_bdev_free_io(bdev_io); 209 blk_request_finish(success, task); 210 } 211 212 static void 213 blk_request_resubmit(void *arg) 214 { 215 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 216 int rc = 0; 217 218 rc = process_blk_request(task, task->bvsession, task->vq); 219 if (rc == 0) { 220 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 221 } else { 222 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 223 } 224 } 225 226 static inline void 227 blk_request_queue_io(struct spdk_vhost_blk_task *task) 228 { 229 int rc; 230 struct spdk_vhost_blk_session *bvsession = task->bvsession; 231 struct spdk_bdev *bdev = bvsession->bvdev->bdev; 232 233 task->bdev_io_wait.bdev = bdev; 234 task->bdev_io_wait.cb_fn = blk_request_resubmit; 235 task->bdev_io_wait.cb_arg = task; 236 237 rc = spdk_bdev_queue_io_wait(bdev, bvsession->io_channel, &task->bdev_io_wait); 238 if (rc != 0) { 239 SPDK_ERRLOG("Queue io failed in vhost_blk, rc=%d\n", rc); 240 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 241 } 242 } 243 244 static int 245 process_blk_request(struct spdk_vhost_blk_task *task, 246 struct spdk_vhost_blk_session *bvsession, 247 struct spdk_vhost_virtqueue *vq) 248 { 249 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 250 const struct virtio_blk_outhdr *req; 251 struct virtio_blk_discard_write_zeroes *desc; 252 struct iovec *iov; 253 uint32_t type; 254 uint32_t payload_len; 255 uint64_t flush_bytes; 256 int rc; 257 258 if (blk_iovs_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, &payload_len)) { 259 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 260 /* Only READ and WRITE are supported for now. */ 261 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 262 return -1; 263 } 264 265 iov = &task->iovs[0]; 266 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 267 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 268 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 269 iov->iov_len, sizeof(*req), task->req_idx); 270 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 271 return -1; 272 } 273 274 req = iov->iov_base; 275 276 iov = &task->iovs[task->iovcnt - 1]; 277 if (spdk_unlikely(iov->iov_len != 1)) { 278 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 279 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 280 iov->iov_len, 1, task->req_idx); 281 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 282 return -1; 283 } 284 285 task->status = iov->iov_base; 286 payload_len -= sizeof(*req) + sizeof(*task->status); 287 task->iovcnt -= 2; 288 289 type = req->type; 290 #ifdef VIRTIO_BLK_T_BARRIER 291 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 292 type &= ~VIRTIO_BLK_T_BARRIER; 293 #endif 294 295 switch (type) { 296 case VIRTIO_BLK_T_IN: 297 case VIRTIO_BLK_T_OUT: 298 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { 299 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 300 type ? "WRITE" : "READ", task->req_idx); 301 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 302 return -1; 303 } 304 305 if (type == VIRTIO_BLK_T_IN) { 306 task->used_len = payload_len + sizeof(*task->status); 307 rc = spdk_bdev_readv(bvdev->bdev_desc, bvsession->io_channel, 308 &task->iovs[1], task->iovcnt, req->sector * 512, 309 payload_len, blk_request_complete_cb, task); 310 } else if (!bvdev->readonly) { 311 task->used_len = sizeof(*task->status); 312 rc = spdk_bdev_writev(bvdev->bdev_desc, bvsession->io_channel, 313 &task->iovs[1], task->iovcnt, req->sector * 512, 314 payload_len, blk_request_complete_cb, task); 315 } else { 316 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 317 rc = -1; 318 } 319 320 if (rc) { 321 if (rc == -ENOMEM) { 322 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 323 blk_request_queue_io(task); 324 } else { 325 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 326 return -1; 327 } 328 } 329 break; 330 case VIRTIO_BLK_T_DISCARD: 331 desc = task->iovs[1].iov_base; 332 if (payload_len != sizeof(*desc)) { 333 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 334 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 335 return -1; 336 } 337 338 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvsession->io_channel, 339 desc->sector * 512, desc->num_sectors * 512, 340 blk_request_complete_cb, task); 341 if (rc) { 342 if (rc == -ENOMEM) { 343 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 344 blk_request_queue_io(task); 345 } else { 346 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 347 return -1; 348 } 349 } 350 break; 351 case VIRTIO_BLK_T_WRITE_ZEROES: 352 desc = task->iovs[1].iov_base; 353 if (payload_len != sizeof(*desc)) { 354 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 355 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 356 return -1; 357 } 358 359 /* Zeroed and Unmap the range, SPDK doen't support it. */ 360 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 361 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 362 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 363 return -1; 364 } 365 366 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvsession->io_channel, 367 desc->sector * 512, desc->num_sectors * 512, 368 blk_request_complete_cb, task); 369 if (rc) { 370 if (rc == -ENOMEM) { 371 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 372 blk_request_queue_io(task); 373 } else { 374 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 375 return -1; 376 } 377 } 378 break; 379 case VIRTIO_BLK_T_FLUSH: 380 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev); 381 if (req->sector != 0) { 382 SPDK_NOTICELOG("sector must be zero for flush command\n"); 383 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 384 return -1; 385 } 386 rc = spdk_bdev_flush(bvdev->bdev_desc, bvsession->io_channel, 387 0, flush_bytes, 388 blk_request_complete_cb, task); 389 if (rc) { 390 if (rc == -ENOMEM) { 391 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 392 blk_request_queue_io(task); 393 } else { 394 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 395 return -1; 396 } 397 } 398 break; 399 case VIRTIO_BLK_T_GET_ID: 400 if (!task->iovcnt || !payload_len) { 401 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 402 return -1; 403 } 404 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 405 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 406 task->used_len, ' '); 407 blk_request_finish(true, task); 408 break; 409 default: 410 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 411 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 412 return -1; 413 } 414 415 return 0; 416 } 417 418 static void 419 process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 420 { 421 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 422 struct spdk_vhost_blk_task *task; 423 struct spdk_vhost_session *vsession = &bvsession->vsession; 424 int rc; 425 uint16_t reqs[32]; 426 uint16_t reqs_cnt, i; 427 428 reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 429 if (!reqs_cnt) { 430 return; 431 } 432 433 for (i = 0; i < reqs_cnt; i++) { 434 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 435 reqs[i]); 436 437 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 438 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 439 bvdev->vdev.name, reqs[i], vq->vring.size); 440 spdk_vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 441 continue; 442 } 443 444 task = &((struct spdk_vhost_blk_task *)vq->tasks)[reqs[i]]; 445 if (spdk_unlikely(task->used)) { 446 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 447 bvdev->vdev.name, reqs[i]); 448 spdk_vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 449 continue; 450 } 451 452 vsession->task_cnt++; 453 454 task->used = true; 455 task->iovcnt = SPDK_COUNTOF(task->iovs); 456 task->status = NULL; 457 task->used_len = 0; 458 459 rc = process_blk_request(task, bvsession, vq); 460 if (rc == 0) { 461 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 462 reqs[i]); 463 } else { 464 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, reqs[i]); 465 } 466 } 467 } 468 469 static int 470 vdev_worker(void *arg) 471 { 472 struct spdk_vhost_blk_session *bvsession = arg; 473 struct spdk_vhost_session *vsession = &bvsession->vsession; 474 475 uint16_t q_idx; 476 477 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 478 process_vq(bvsession, &vsession->virtqueue[q_idx]); 479 } 480 481 spdk_vhost_session_used_signal(vsession); 482 483 return -1; 484 } 485 486 static void 487 no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 488 { 489 struct spdk_vhost_session *vsession = &bvsession->vsession; 490 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 491 uint32_t length; 492 uint16_t iovcnt, req_idx; 493 494 if (spdk_vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 495 return; 496 } 497 498 iovcnt = SPDK_COUNTOF(iovs); 499 if (blk_iovs_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) { 500 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 501 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 502 } 503 504 spdk_vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 505 } 506 507 static int 508 no_bdev_vdev_worker(void *arg) 509 { 510 struct spdk_vhost_blk_session *bvsession = arg; 511 struct spdk_vhost_session *vsession = &bvsession->vsession; 512 uint16_t q_idx; 513 514 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 515 no_bdev_process_vq(bvsession, &vsession->virtqueue[q_idx]); 516 } 517 518 spdk_vhost_session_used_signal(vsession); 519 520 if (vsession->task_cnt == 0 && bvsession->io_channel) { 521 spdk_put_io_channel(bvsession->io_channel); 522 bvsession->io_channel = NULL; 523 } 524 525 return -1; 526 } 527 528 static struct spdk_vhost_blk_session * 529 to_blk_session(struct spdk_vhost_session *vsession) 530 { 531 if (vsession == NULL) { 532 return NULL; 533 } 534 535 if (vsession->vdev->backend != &vhost_blk_device_backend) { 536 SPDK_ERRLOG("%s: not a vhost-blk device\n", vsession->vdev->name); 537 return NULL; 538 } 539 540 return (struct spdk_vhost_blk_session *)vsession; 541 } 542 543 static struct spdk_vhost_blk_dev * 544 to_blk_dev(struct spdk_vhost_dev *vdev) 545 { 546 if (vdev == NULL) { 547 return NULL; 548 } 549 550 if (vdev->backend != &vhost_blk_device_backend) { 551 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 552 return NULL; 553 } 554 555 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 556 } 557 558 struct spdk_bdev * 559 spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev) 560 { 561 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 562 563 assert(bvdev != NULL); 564 return bvdev->bdev; 565 } 566 567 static int 568 _spdk_vhost_session_bdev_remove_cb(struct spdk_vhost_dev *vdev, struct spdk_vhost_session *vsession, 569 void *ctx) 570 { 571 struct spdk_vhost_blk_session *bvsession; 572 573 if (vdev == NULL) { 574 /* Nothing to do */ 575 return 0; 576 } 577 578 if (vsession == NULL) { 579 /* All sessions have been notified, time to close the bdev */ 580 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 581 582 assert(bvdev != NULL); 583 584 spdk_bdev_close(bvdev->bdev_desc); 585 bvdev->bdev_desc = NULL; 586 bvdev->bdev = NULL; 587 return 0; 588 } 589 590 bvsession = (struct spdk_vhost_blk_session *)vsession; 591 if (bvsession->requestq_poller) { 592 spdk_poller_unregister(&bvsession->requestq_poller); 593 bvsession->requestq_poller = spdk_poller_register(no_bdev_vdev_worker, bvsession, 0); 594 } 595 596 return 0; 597 } 598 599 static int 600 _bdev_remove_cb(struct spdk_vhost_dev *vdev, void *arg) 601 { 602 SPDK_WARNLOG("Controller %s: Hot-removing bdev - all further requests will fail.\n", 603 vdev->name); 604 spdk_vhost_dev_foreach_session(vdev, _spdk_vhost_session_bdev_remove_cb, NULL); 605 return 0; 606 } 607 608 static void 609 bdev_remove_cb(void *remove_ctx) 610 { 611 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 612 613 spdk_vhost_call_external_event(bvdev->vdev.name, _bdev_remove_cb, bvdev); 614 } 615 616 static void 617 free_task_pool(struct spdk_vhost_blk_session *bvsession) 618 { 619 struct spdk_vhost_session *vsession = &bvsession->vsession; 620 struct spdk_vhost_virtqueue *vq; 621 uint16_t i; 622 623 for (i = 0; i < vsession->max_queues; i++) { 624 vq = &vsession->virtqueue[i]; 625 if (vq->tasks == NULL) { 626 continue; 627 } 628 629 spdk_dma_free(vq->tasks); 630 vq->tasks = NULL; 631 } 632 } 633 634 static int 635 alloc_task_pool(struct spdk_vhost_blk_session *bvsession) 636 { 637 struct spdk_vhost_session *vsession = &bvsession->vsession; 638 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 639 struct spdk_vhost_virtqueue *vq; 640 struct spdk_vhost_blk_task *task; 641 uint32_t task_cnt; 642 uint16_t i; 643 uint32_t j; 644 645 for (i = 0; i < vsession->max_queues; i++) { 646 vq = &vsession->virtqueue[i]; 647 if (vq->vring.desc == NULL) { 648 continue; 649 } 650 651 task_cnt = vq->vring.size; 652 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 653 /* sanity check */ 654 SPDK_ERRLOG("Controller %s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 655 bvdev->vdev.name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 656 free_task_pool(bvsession); 657 return -1; 658 } 659 vq->tasks = spdk_dma_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 660 SPDK_CACHE_LINE_SIZE, NULL); 661 if (vq->tasks == NULL) { 662 SPDK_ERRLOG("Controller %s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 663 bvdev->vdev.name, task_cnt, i); 664 free_task_pool(bvsession); 665 return -1; 666 } 667 668 for (j = 0; j < task_cnt; j++) { 669 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 670 task->bvsession = bvsession; 671 task->req_idx = j; 672 task->vq = vq; 673 } 674 } 675 676 return 0; 677 } 678 679 static int 680 spdk_vhost_blk_start_cb(struct spdk_vhost_dev *vdev, 681 struct spdk_vhost_session *vsession, void *event_ctx) 682 { 683 struct spdk_vhost_blk_dev *bvdev; 684 struct spdk_vhost_blk_session *bvsession; 685 int i, rc = 0; 686 687 bvsession = to_blk_session(vsession); 688 if (bvsession == NULL) { 689 SPDK_ERRLOG("Trying to start non-blk controller as a blk one.\n"); 690 rc = -1; 691 goto out; 692 } 693 694 bvdev = to_blk_dev(vdev); 695 assert(bvdev != NULL); 696 bvsession->bvdev = bvdev; 697 698 /* validate all I/O queues are in a contiguous index range */ 699 for (i = 0; i < vsession->max_queues; i++) { 700 if (vsession->virtqueue[i].vring.desc == NULL) { 701 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vdev->name, i); 702 rc = -1; 703 goto out; 704 } 705 } 706 707 rc = alloc_task_pool(bvsession); 708 if (rc != 0) { 709 SPDK_ERRLOG("%s: failed to alloc task pool.\n", bvdev->vdev.name); 710 goto out; 711 } 712 713 if (bvdev->bdev) { 714 bvsession->io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 715 if (!bvsession->io_channel) { 716 free_task_pool(bvsession); 717 SPDK_ERRLOG("Controller %s: IO channel allocation failed\n", vdev->name); 718 rc = -1; 719 goto out; 720 } 721 } 722 723 bvsession->requestq_poller = spdk_poller_register(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 724 bvsession, 0); 725 SPDK_INFOLOG(SPDK_LOG_VHOST, "Started poller for vhost controller %s on lcore %d\n", 726 vdev->name, vsession->lcore); 727 out: 728 spdk_vhost_session_event_done(event_ctx, rc); 729 return rc; 730 } 731 732 static int 733 spdk_vhost_blk_start(struct spdk_vhost_session *vsession) 734 { 735 int rc; 736 737 vsession->lcore = spdk_vhost_allocate_reactor(vsession->vdev->cpumask); 738 rc = spdk_vhost_session_send_event(vsession, spdk_vhost_blk_start_cb, 739 3, "start session"); 740 741 if (rc != 0) { 742 spdk_vhost_free_reactor(vsession->lcore); 743 vsession->lcore = -1; 744 } 745 746 return rc; 747 } 748 749 static int 750 destroy_session_poller_cb(void *arg) 751 { 752 struct spdk_vhost_blk_session *bvsession = arg; 753 struct spdk_vhost_session *vsession = &bvsession->vsession; 754 int i; 755 756 if (vsession->task_cnt > 0) { 757 return -1; 758 } 759 760 for (i = 0; i < vsession->max_queues; i++) { 761 vsession->virtqueue[i].next_event_time = 0; 762 spdk_vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 763 } 764 765 SPDK_INFOLOG(SPDK_LOG_VHOST, "Stopping poller for vhost controller %s\n", vsession->vdev->name); 766 767 if (bvsession->io_channel) { 768 spdk_put_io_channel(bvsession->io_channel); 769 bvsession->io_channel = NULL; 770 } 771 772 free_task_pool(bvsession); 773 spdk_poller_unregister(&bvsession->destroy_ctx.poller); 774 spdk_vhost_session_event_done(bvsession->destroy_ctx.event_ctx, 0); 775 776 return -1; 777 } 778 779 static int 780 spdk_vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, 781 struct spdk_vhost_session *vsession, void *event_ctx) 782 { 783 struct spdk_vhost_blk_session *bvsession; 784 785 bvsession = to_blk_session(vsession); 786 if (bvsession == NULL) { 787 SPDK_ERRLOG("Trying to stop non-blk controller as a blk one.\n"); 788 goto err; 789 } 790 791 bvsession->destroy_ctx.event_ctx = event_ctx; 792 spdk_poller_unregister(&bvsession->requestq_poller); 793 bvsession->destroy_ctx.poller = spdk_poller_register(destroy_session_poller_cb, 794 bvsession, 1000); 795 return 0; 796 797 err: 798 spdk_vhost_session_event_done(event_ctx, -1); 799 return -1; 800 } 801 802 static int 803 spdk_vhost_blk_stop(struct spdk_vhost_session *vsession) 804 { 805 int rc; 806 807 rc = spdk_vhost_session_send_event(vsession, spdk_vhost_blk_stop_cb, 808 3, "stop session"); 809 if (rc != 0) { 810 return rc; 811 } 812 813 spdk_vhost_free_reactor(vsession->lcore); 814 vsession->lcore = -1; 815 return 0; 816 } 817 818 static void 819 spdk_vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 820 { 821 struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev); 822 struct spdk_vhost_blk_dev *bvdev; 823 824 bvdev = to_blk_dev(vdev); 825 if (bvdev == NULL) { 826 return; 827 } 828 829 assert(bvdev != NULL); 830 spdk_json_write_named_object_begin(w, "block"); 831 832 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 833 834 spdk_json_write_name(w, "bdev"); 835 if (bdev) { 836 spdk_json_write_string(w, spdk_bdev_get_name(bdev)); 837 } else { 838 spdk_json_write_null(w); 839 } 840 841 spdk_json_write_object_end(w); 842 } 843 844 static void 845 spdk_vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 846 { 847 struct spdk_vhost_blk_dev *bvdev; 848 849 bvdev = to_blk_dev(vdev); 850 if (bvdev == NULL) { 851 return; 852 } 853 854 if (!bvdev->bdev) { 855 return; 856 } 857 858 spdk_json_write_object_begin(w); 859 spdk_json_write_named_string(w, "method", "construct_vhost_blk_controller"); 860 861 spdk_json_write_named_object_begin(w, "params"); 862 spdk_json_write_named_string(w, "ctrlr", vdev->name); 863 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 864 spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(vdev->cpumask)); 865 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 866 spdk_json_write_object_end(w); 867 868 spdk_json_write_object_end(w); 869 } 870 871 static int spdk_vhost_blk_destroy(struct spdk_vhost_dev *dev); 872 873 static int 874 spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 875 uint32_t len) 876 { 877 struct virtio_blk_config blkcfg; 878 struct spdk_vhost_blk_dev *bvdev; 879 struct spdk_bdev *bdev; 880 uint32_t blk_size; 881 uint64_t blkcnt; 882 883 bvdev = to_blk_dev(vdev); 884 if (bvdev == NULL) { 885 SPDK_ERRLOG("Trying to get virito_blk configuration failed\n"); 886 return -1; 887 } 888 889 bdev = bvdev->bdev; 890 if (bdev == NULL) { 891 /* We can't just return -1 here as this GET_CONFIG message might 892 * be caused by a QEMU VM reboot. Returning -1 will indicate an 893 * error to QEMU, who might then decide to terminate itself. 894 * We don't want that. A simple reboot shouldn't break the system. 895 * 896 * Presenting a block device with block size 0 and block count 0 897 * doesn't cause any problems on QEMU side and the virtio-pci 898 * device is even still available inside the VM, but there will 899 * be no block device created for it - the kernel drivers will 900 * silently reject it. 901 */ 902 blk_size = 0; 903 blkcnt = 0; 904 } else { 905 blk_size = spdk_bdev_get_block_size(bdev); 906 blkcnt = spdk_bdev_get_num_blocks(bdev); 907 } 908 909 memset(&blkcfg, 0, sizeof(blkcfg)); 910 blkcfg.blk_size = blk_size; 911 /* minimum I/O size in blocks */ 912 blkcfg.min_io_size = 1; 913 /* expressed in 512 Bytes sectors */ 914 blkcfg.capacity = (blkcnt * blk_size) / 512; 915 blkcfg.size_max = 131072; 916 /* -2 for REQ and RESP and -1 for region boundary splitting */ 917 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 918 /* QEMU can overwrite this value when started */ 919 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 920 921 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 922 /* 16MiB, expressed in 512 Bytes */ 923 blkcfg.max_discard_sectors = 32768; 924 blkcfg.max_discard_seg = 1; 925 blkcfg.discard_sector_alignment = blk_size / 512; 926 } 927 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 928 blkcfg.max_write_zeroes_sectors = 32768; 929 blkcfg.max_write_zeroes_seg = 1; 930 } 931 932 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 933 934 return 0; 935 } 936 937 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 938 .virtio_features = SPDK_VHOST_FEATURES | 939 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | 940 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_RO) | 941 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | (1ULL << VIRTIO_BLK_F_TOPOLOGY) | 942 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | 943 (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 944 (1ULL << VIRTIO_BLK_F_MQ) | (1ULL << VIRTIO_BLK_F_DISCARD) | 945 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES), 946 .disabled_features = SPDK_VHOST_DISABLED_FEATURES | (1ULL << VIRTIO_BLK_F_GEOMETRY) | 947 (1ULL << VIRTIO_BLK_F_RO) | (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | 948 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_DISCARD) | 949 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES), 950 .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session), 951 .start_session = spdk_vhost_blk_start, 952 .stop_session = spdk_vhost_blk_stop, 953 .vhost_get_config = spdk_vhost_blk_get_config, 954 .dump_info_json = spdk_vhost_blk_dump_info_json, 955 .write_config_json = spdk_vhost_blk_write_config_json, 956 .remove_device = spdk_vhost_blk_destroy, 957 }; 958 959 int 960 spdk_vhost_blk_controller_construct(void) 961 { 962 struct spdk_conf_section *sp; 963 unsigned ctrlr_num; 964 char *bdev_name; 965 char *cpumask; 966 char *name; 967 bool readonly; 968 969 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 970 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 971 continue; 972 } 973 974 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 975 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 976 spdk_conf_section_get_name(sp)); 977 return -1; 978 } 979 980 name = spdk_conf_section_get_val(sp, "Name"); 981 if (name == NULL) { 982 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 983 return -1; 984 } 985 986 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 987 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 988 989 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 990 if (bdev_name == NULL) { 991 continue; 992 } 993 994 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, readonly) < 0) { 995 return -1; 996 } 997 } 998 999 return 0; 1000 } 1001 1002 int 1003 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, bool readonly) 1004 { 1005 struct spdk_vhost_blk_dev *bvdev = NULL; 1006 struct spdk_bdev *bdev; 1007 uint64_t features = 0; 1008 int ret = 0; 1009 1010 spdk_vhost_lock(); 1011 bdev = spdk_bdev_get_by_name(dev_name); 1012 if (bdev == NULL) { 1013 SPDK_ERRLOG("Controller %s: bdev '%s' not found\n", 1014 name, dev_name); 1015 ret = -ENODEV; 1016 goto out; 1017 } 1018 1019 bvdev = spdk_dma_zmalloc(sizeof(*bvdev), SPDK_CACHE_LINE_SIZE, NULL); 1020 if (bvdev == NULL) { 1021 ret = -ENOMEM; 1022 goto out; 1023 } 1024 1025 ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc); 1026 if (ret != 0) { 1027 SPDK_ERRLOG("Controller %s: could not open bdev '%s', error=%d\n", 1028 name, dev_name, ret); 1029 goto out; 1030 } 1031 1032 bvdev->bdev = bdev; 1033 bvdev->readonly = readonly; 1034 ret = spdk_vhost_dev_register(&bvdev->vdev, name, cpumask, &vhost_blk_device_backend); 1035 if (ret != 0) { 1036 spdk_bdev_close(bvdev->bdev_desc); 1037 goto out; 1038 } 1039 1040 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1041 features |= (1ULL << VIRTIO_BLK_F_DISCARD); 1042 } 1043 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1044 features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 1045 } 1046 if (readonly) { 1047 features |= (1ULL << VIRTIO_BLK_F_RO); 1048 } 1049 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 1050 features |= (1ULL << VIRTIO_BLK_F_FLUSH); 1051 } 1052 1053 if (features && rte_vhost_driver_enable_features(bvdev->vdev.path, features)) { 1054 SPDK_ERRLOG("Controller %s: failed to enable features 0x%"PRIx64"\n", name, features); 1055 1056 if (spdk_vhost_dev_unregister(&bvdev->vdev) != 0) { 1057 SPDK_ERRLOG("Controller %s: failed to remove controller\n", name); 1058 } 1059 1060 spdk_bdev_close(bvdev->bdev_desc); 1061 ret = -1; 1062 goto out; 1063 } 1064 1065 SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: using bdev '%s'\n", name, dev_name); 1066 out: 1067 if (ret != 0 && bvdev) { 1068 spdk_dma_free(bvdev); 1069 } 1070 spdk_vhost_unlock(); 1071 return ret; 1072 } 1073 1074 static int 1075 spdk_vhost_blk_destroy(struct spdk_vhost_dev *vdev) 1076 { 1077 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 1078 int rc; 1079 1080 if (!bvdev) { 1081 return -EINVAL; 1082 } 1083 1084 rc = spdk_vhost_dev_unregister(&bvdev->vdev); 1085 if (rc != 0) { 1086 return rc; 1087 } 1088 1089 if (bvdev->bdev_desc) { 1090 spdk_bdev_close(bvdev->bdev_desc); 1091 bvdev->bdev_desc = NULL; 1092 } 1093 bvdev->bdev = NULL; 1094 1095 spdk_dma_free(bvdev); 1096 return 0; 1097 } 1098 1099 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 1100 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 1101