1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/thread.h" 40 #include "spdk/likely.h" 41 #include "spdk/string.h" 42 #include "spdk/util.h" 43 #include "spdk/vhost.h" 44 45 #include "vhost_internal.h" 46 #include <rte_version.h> 47 48 /* Minimal set of features supported by every SPDK VHOST-BLK device */ 49 #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \ 50 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 51 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 52 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \ 53 (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 54 (1ULL << VIRTIO_BLK_F_MQ)) 55 56 /* Not supported features */ 57 #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \ 58 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 59 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI)) 60 61 /* Vhost-blk support protocol features */ 62 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \ 63 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) 64 65 struct spdk_vhost_blk_task { 66 struct spdk_bdev_io *bdev_io; 67 struct spdk_vhost_blk_session *bvsession; 68 struct spdk_vhost_virtqueue *vq; 69 70 volatile uint8_t *status; 71 72 uint16_t req_idx; 73 uint16_t num_descs; 74 uint16_t buffer_id; 75 76 /* for io wait */ 77 struct spdk_bdev_io_wait_entry bdev_io_wait; 78 79 /* If set, the task is currently used for I/O processing. */ 80 bool used; 81 82 /** Number of bytes that were written. */ 83 uint32_t used_len; 84 uint16_t iovcnt; 85 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 86 }; 87 88 struct spdk_vhost_blk_dev { 89 struct spdk_vhost_dev vdev; 90 struct spdk_bdev *bdev; 91 struct spdk_bdev_desc *bdev_desc; 92 /* dummy_io_channel is used to hold a bdev reference */ 93 struct spdk_io_channel *dummy_io_channel; 94 bool readonly; 95 }; 96 97 struct spdk_vhost_blk_session { 98 /* The parent session must be the very first field in this struct */ 99 struct spdk_vhost_session vsession; 100 struct spdk_vhost_blk_dev *bvdev; 101 struct spdk_poller *requestq_poller; 102 struct spdk_io_channel *io_channel; 103 struct spdk_poller *stop_poller; 104 }; 105 106 /* forward declaration */ 107 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 108 109 static int 110 process_blk_request(struct spdk_vhost_blk_task *task, 111 struct spdk_vhost_blk_session *bvsession, 112 struct spdk_vhost_virtqueue *vq); 113 114 static struct spdk_vhost_blk_session * 115 to_blk_session(struct spdk_vhost_session *vsession) 116 { 117 assert(vsession->vdev->backend == &vhost_blk_device_backend); 118 return (struct spdk_vhost_blk_session *)vsession; 119 } 120 121 static void 122 blk_task_finish(struct spdk_vhost_blk_task *task) 123 { 124 assert(task->bvsession->vsession.task_cnt > 0); 125 task->bvsession->vsession.task_cnt--; 126 task->used = false; 127 } 128 129 static void 130 blk_task_init(struct spdk_vhost_blk_task *task) 131 { 132 task->used = true; 133 task->iovcnt = SPDK_COUNTOF(task->iovs); 134 task->status = NULL; 135 task->used_len = 0; 136 } 137 138 static void 139 blk_task_enqueue(struct spdk_vhost_blk_task *task) 140 { 141 if (task->vq->packed.packed_ring) { 142 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq, 143 task->num_descs, 144 task->buffer_id, task->used_len); 145 } else { 146 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, 147 task->req_idx, task->used_len); 148 } 149 } 150 151 static void 152 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 153 { 154 if (task->status) { 155 *task->status = status; 156 } 157 158 blk_task_enqueue(task); 159 blk_task_finish(task); 160 SPDK_DEBUGLOG(vhost_blk_data, "Invalid request (status=%" PRIu8")\n", status); 161 } 162 163 /* 164 * Process task's descriptor chain and setup data related fields. 165 * Return 166 * total size of suplied buffers 167 * 168 * FIXME: Make this function return to rd_cnt and wr_cnt 169 */ 170 static int 171 blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession, 172 struct spdk_vhost_virtqueue *vq, 173 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 174 { 175 struct spdk_vhost_session *vsession = &bvsession->vsession; 176 struct spdk_vhost_dev *vdev = vsession->vdev; 177 struct vring_desc *desc, *desc_table; 178 uint16_t out_cnt = 0, cnt = 0; 179 uint32_t desc_table_size, len = 0; 180 uint32_t desc_handled_cnt; 181 int rc; 182 183 rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size); 184 if (rc != 0) { 185 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 186 return -1; 187 } 188 189 desc_handled_cnt = 0; 190 while (1) { 191 /* 192 * Maximum cnt reached? 193 * Should not happen if request is well formatted, otherwise this is a BUG. 194 */ 195 if (spdk_unlikely(cnt == *iovs_cnt)) { 196 SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 197 vsession->name, req_idx); 198 return -1; 199 } 200 201 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) { 202 SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 203 vsession->name, req_idx, cnt); 204 return -1; 205 } 206 207 len += desc->len; 208 209 out_cnt += vhost_vring_desc_is_wr(desc); 210 211 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 212 if (rc != 0) { 213 SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 214 vsession->name, req_idx); 215 return -1; 216 } else if (desc == NULL) { 217 break; 218 } 219 220 desc_handled_cnt++; 221 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { 222 /* Break a cycle and report an error, if any. */ 223 SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n", 224 vsession->name, desc_table_size, desc_handled_cnt); 225 return -1; 226 } 227 } 228 229 /* 230 * There must be least two descriptors. 231 * First contain request so it must be readable. 232 * Last descriptor contain buffer for response so it must be writable. 233 */ 234 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 235 return -1; 236 } 237 238 *length = len; 239 *iovs_cnt = cnt; 240 return 0; 241 } 242 243 static int 244 blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession, 245 struct spdk_vhost_virtqueue *vq, 246 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 247 { 248 struct spdk_vhost_session *vsession = &bvsession->vsession; 249 struct spdk_vhost_dev *vdev = vsession->vdev; 250 struct vring_packed_desc *desc = NULL, *desc_table; 251 uint16_t out_cnt = 0, cnt = 0; 252 uint32_t desc_table_size, len = 0; 253 int rc = 0; 254 255 rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc, 256 &desc_table, &desc_table_size); 257 if (spdk_unlikely(rc != 0)) { 258 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 259 return rc; 260 } 261 262 if (desc_table != NULL) { 263 req_idx = 0; 264 } 265 266 while (1) { 267 /* 268 * Maximum cnt reached? 269 * Should not happen if request is well formatted, otherwise this is a BUG. 270 */ 271 if (spdk_unlikely(cnt == *iovs_cnt)) { 272 SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 273 vsession->name, req_idx); 274 return -EINVAL; 275 } 276 277 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) { 278 SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 279 vsession->name, req_idx, cnt); 280 return -EINVAL; 281 } 282 283 len += desc->len; 284 out_cnt += vhost_vring_packed_desc_is_wr(desc); 285 286 /* desc is NULL means we reach the last desc of this request */ 287 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size); 288 if (desc == NULL) { 289 break; 290 } 291 } 292 293 /* 294 * There must be least two descriptors. 295 * First contain request so it must be readable. 296 * Last descriptor contain buffer for response so it must be writable. 297 */ 298 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 299 return -EINVAL; 300 } 301 302 *length = len; 303 *iovs_cnt = cnt; 304 305 return 0; 306 } 307 308 static void 309 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 310 { 311 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 312 313 blk_task_enqueue(task); 314 315 SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %s\n", task, 316 task->req_idx, success ? "OK" : "FAIL"); 317 blk_task_finish(task); 318 } 319 320 static void 321 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 322 { 323 struct spdk_vhost_blk_task *task = cb_arg; 324 325 spdk_bdev_free_io(bdev_io); 326 blk_request_finish(success, task); 327 } 328 329 static void 330 blk_request_resubmit(void *arg) 331 { 332 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 333 int rc = 0; 334 335 blk_task_init(task); 336 337 rc = process_blk_request(task, task->bvsession, task->vq); 338 if (rc == 0) { 339 SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task); 340 } else { 341 SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task); 342 } 343 } 344 345 static inline void 346 blk_request_queue_io(struct spdk_vhost_blk_task *task) 347 { 348 int rc; 349 struct spdk_vhost_blk_session *bvsession = task->bvsession; 350 struct spdk_bdev *bdev = bvsession->bvdev->bdev; 351 352 task->bdev_io_wait.bdev = bdev; 353 task->bdev_io_wait.cb_fn = blk_request_resubmit; 354 task->bdev_io_wait.cb_arg = task; 355 356 rc = spdk_bdev_queue_io_wait(bdev, bvsession->io_channel, &task->bdev_io_wait); 357 if (rc != 0) { 358 SPDK_ERRLOG("%s: failed to queue I/O, rc=%d\n", bvsession->vsession.name, rc); 359 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 360 } 361 } 362 363 static int 364 process_blk_request(struct spdk_vhost_blk_task *task, 365 struct spdk_vhost_blk_session *bvsession, 366 struct spdk_vhost_virtqueue *vq) 367 { 368 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 369 const struct virtio_blk_outhdr *req; 370 struct virtio_blk_discard_write_zeroes *desc; 371 struct iovec *iov; 372 uint32_t type; 373 uint32_t payload_len; 374 uint64_t flush_bytes; 375 int rc; 376 377 if (vq->packed.packed_ring) { 378 rc = blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 379 &payload_len); 380 } else { 381 rc = blk_iovs_split_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 382 &payload_len); 383 } 384 385 if (rc) { 386 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 387 /* Only READ and WRITE are supported for now. */ 388 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 389 return -1; 390 } 391 392 iov = &task->iovs[0]; 393 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 394 SPDK_DEBUGLOG(vhost_blk, 395 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 396 iov->iov_len, sizeof(*req), task->req_idx); 397 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 398 return -1; 399 } 400 401 req = iov->iov_base; 402 403 iov = &task->iovs[task->iovcnt - 1]; 404 if (spdk_unlikely(iov->iov_len != 1)) { 405 SPDK_DEBUGLOG(vhost_blk, 406 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 407 iov->iov_len, 1, task->req_idx); 408 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 409 return -1; 410 } 411 412 task->status = iov->iov_base; 413 payload_len -= sizeof(*req) + sizeof(*task->status); 414 task->iovcnt -= 2; 415 416 type = req->type; 417 #ifdef VIRTIO_BLK_T_BARRIER 418 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 419 type &= ~VIRTIO_BLK_T_BARRIER; 420 #endif 421 422 switch (type) { 423 case VIRTIO_BLK_T_IN: 424 case VIRTIO_BLK_T_OUT: 425 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { 426 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 427 type ? "WRITE" : "READ", task->req_idx); 428 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 429 return -1; 430 } 431 432 if (type == VIRTIO_BLK_T_IN) { 433 task->used_len = payload_len + sizeof(*task->status); 434 rc = spdk_bdev_readv(bvdev->bdev_desc, bvsession->io_channel, 435 &task->iovs[1], task->iovcnt, req->sector * 512, 436 payload_len, blk_request_complete_cb, task); 437 } else if (!bvdev->readonly) { 438 task->used_len = sizeof(*task->status); 439 rc = spdk_bdev_writev(bvdev->bdev_desc, bvsession->io_channel, 440 &task->iovs[1], task->iovcnt, req->sector * 512, 441 payload_len, blk_request_complete_cb, task); 442 } else { 443 SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n"); 444 rc = -1; 445 } 446 447 if (rc) { 448 if (rc == -ENOMEM) { 449 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n"); 450 blk_request_queue_io(task); 451 } else { 452 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 453 return -1; 454 } 455 } 456 break; 457 case VIRTIO_BLK_T_DISCARD: 458 desc = task->iovs[1].iov_base; 459 if (payload_len != sizeof(*desc)) { 460 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 461 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 462 return -1; 463 } 464 465 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvsession->io_channel, 466 desc->sector * 512, desc->num_sectors * 512, 467 blk_request_complete_cb, task); 468 if (rc) { 469 if (rc == -ENOMEM) { 470 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n"); 471 blk_request_queue_io(task); 472 } else { 473 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 474 return -1; 475 } 476 } 477 break; 478 case VIRTIO_BLK_T_WRITE_ZEROES: 479 desc = task->iovs[1].iov_base; 480 if (payload_len != sizeof(*desc)) { 481 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 482 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 483 return -1; 484 } 485 486 /* Zeroed and Unmap the range, SPDK doen't support it. */ 487 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 488 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 489 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 490 return -1; 491 } 492 493 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvsession->io_channel, 494 desc->sector * 512, desc->num_sectors * 512, 495 blk_request_complete_cb, task); 496 if (rc) { 497 if (rc == -ENOMEM) { 498 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n"); 499 blk_request_queue_io(task); 500 } else { 501 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 502 return -1; 503 } 504 } 505 break; 506 case VIRTIO_BLK_T_FLUSH: 507 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev); 508 if (req->sector != 0) { 509 SPDK_NOTICELOG("sector must be zero for flush command\n"); 510 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 511 return -1; 512 } 513 rc = spdk_bdev_flush(bvdev->bdev_desc, bvsession->io_channel, 514 0, flush_bytes, 515 blk_request_complete_cb, task); 516 if (rc) { 517 if (rc == -ENOMEM) { 518 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n"); 519 blk_request_queue_io(task); 520 } else { 521 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 522 return -1; 523 } 524 } 525 break; 526 case VIRTIO_BLK_T_GET_ID: 527 if (!task->iovcnt || !payload_len) { 528 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 529 return -1; 530 } 531 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 532 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 533 task->used_len, ' '); 534 blk_request_finish(true, task); 535 break; 536 default: 537 SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type); 538 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 539 return -1; 540 } 541 542 return 0; 543 } 544 545 static void 546 process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx) 547 { 548 struct spdk_vhost_blk_task *task; 549 uint16_t task_idx = req_idx, num_descs; 550 551 if (vq->packed.packed_ring) { 552 /* Packed ring used the buffer_id as the task_idx to get task struct. 553 * In kernel driver, it uses the vq->free_head to set the buffer_id so the value 554 * must be in the range of 0 ~ vring.size. The free_head value must be unique 555 * in the outstanding requests. 556 * We can't use the req_idx as the task_idx because the desc can be reused in 557 * the next phase even when it's not completed in the previous phase. For example, 558 * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving 559 * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used 560 * as task_idx because we will know task[0]->used is true at phase 1. 561 * The split queue is quite different, the desc would insert into the free list when 562 * device completes the request, the driver gets the desc from the free list which 563 * ensures the req_idx is unique in the outstanding requests. 564 */ 565 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 566 } 567 568 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 569 if (spdk_unlikely(task->used)) { 570 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 571 task->bvsession->vsession.name, task_idx); 572 task->used_len = 0; 573 blk_task_enqueue(task); 574 return; 575 } 576 577 if (vq->packed.packed_ring) { 578 task->req_idx = req_idx; 579 task->num_descs = num_descs; 580 task->buffer_id = task_idx; 581 } 582 583 task->bvsession->vsession.task_cnt++; 584 585 blk_task_init(task); 586 587 if (process_blk_request(task, task->bvsession, vq) == 0) { 588 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task, 589 task_idx); 590 } else { 591 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx); 592 } 593 } 594 595 static void 596 submit_inflight_desc(struct spdk_vhost_blk_session *bvsession, 597 struct spdk_vhost_virtqueue *vq) 598 { 599 struct spdk_vhost_session *vsession = &bvsession->vsession; 600 spdk_vhost_resubmit_info *resubmit = vq->vring_inflight.resubmit_inflight; 601 spdk_vhost_resubmit_desc *resubmit_list; 602 uint16_t req_idx; 603 604 if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL)) { 605 return; 606 } 607 608 resubmit_list = resubmit->resubmit_list; 609 while (resubmit->resubmit_num-- > 0) { 610 req_idx = resubmit_list[resubmit->resubmit_num].index; 611 SPDK_DEBUGLOG(vhost_blk, "====== Start processing request idx %"PRIu16"======\n", 612 req_idx); 613 614 if (spdk_unlikely(req_idx >= vq->vring.size)) { 615 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 616 vsession->name, req_idx, vq->vring.size); 617 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 618 continue; 619 } 620 621 process_blk_task(vq, req_idx); 622 } 623 624 free(resubmit_list); 625 resubmit->resubmit_list = NULL; 626 } 627 628 static void 629 process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 630 { 631 struct spdk_vhost_session *vsession = &bvsession->vsession; 632 uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS]; 633 uint16_t reqs_cnt, i; 634 635 submit_inflight_desc(bvsession, vq); 636 637 reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 638 if (!reqs_cnt) { 639 return; 640 } 641 642 for (i = 0; i < reqs_cnt; i++) { 643 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n", 644 reqs[i]); 645 646 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 647 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 648 vsession->name, reqs[i], vq->vring.size); 649 vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 650 continue; 651 } 652 653 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]); 654 655 process_blk_task(vq, reqs[i]); 656 } 657 } 658 659 static void 660 process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 661 { 662 uint16_t i = 0; 663 664 while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS && 665 vhost_vq_packed_ring_is_avail(vq)) { 666 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n", 667 vq->last_avail_idx); 668 669 process_blk_task(vq, vq->last_avail_idx); 670 } 671 } 672 673 static int 674 _vdev_vq_worker(struct spdk_vhost_virtqueue *vq) 675 { 676 struct spdk_vhost_session *vsession = vq->vsession; 677 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 678 bool packed_ring; 679 680 packed_ring = vq->packed.packed_ring; 681 if (packed_ring) { 682 process_packed_vq(bvsession, vq); 683 } else { 684 process_vq(bvsession, vq); 685 } 686 687 vhost_session_vq_used_signal(vq); 688 689 return SPDK_POLLER_BUSY; 690 691 } 692 693 static int 694 vdev_vq_worker(void *arg) 695 { 696 struct spdk_vhost_virtqueue *vq = arg; 697 698 return _vdev_vq_worker(vq); 699 } 700 701 static int 702 vdev_worker(void *arg) 703 { 704 struct spdk_vhost_blk_session *bvsession = arg; 705 struct spdk_vhost_session *vsession = &bvsession->vsession; 706 uint16_t q_idx; 707 708 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 709 _vdev_vq_worker(&vsession->virtqueue[q_idx]); 710 } 711 712 return SPDK_POLLER_BUSY; 713 } 714 715 static void 716 no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 717 { 718 struct spdk_vhost_session *vsession = &bvsession->vsession; 719 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 720 uint32_t length; 721 uint16_t iovcnt, req_idx; 722 723 if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 724 return; 725 } 726 727 iovcnt = SPDK_COUNTOF(iovs); 728 if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) { 729 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 730 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx); 731 } 732 733 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 734 } 735 736 static void 737 no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 738 { 739 struct spdk_vhost_session *vsession = &bvsession->vsession; 740 struct spdk_vhost_blk_task *task; 741 uint32_t length; 742 uint16_t req_idx = vq->last_avail_idx; 743 uint16_t task_idx, num_descs; 744 745 if (!vhost_vq_packed_ring_is_avail(vq)) { 746 return; 747 } 748 749 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 750 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 751 if (spdk_unlikely(task->used)) { 752 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 753 vsession->name, req_idx); 754 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 755 task->buffer_id, task->used_len); 756 return; 757 } 758 759 task->req_idx = req_idx; 760 task->num_descs = num_descs; 761 task->buffer_id = task_idx; 762 blk_task_init(task); 763 764 if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 765 &length)) { 766 *(volatile uint8_t *)(task->iovs[task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR; 767 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx); 768 } 769 770 task->used = false; 771 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 772 task->buffer_id, task->used_len); 773 } 774 775 static int 776 _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq) 777 { 778 struct spdk_vhost_session *vsession = vq->vsession; 779 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 780 bool packed_ring; 781 782 packed_ring = vq->packed.packed_ring; 783 if (packed_ring) { 784 no_bdev_process_packed_vq(bvsession, vq); 785 } else { 786 no_bdev_process_vq(bvsession, vq); 787 } 788 789 vhost_session_vq_used_signal(vq); 790 791 if (vsession->task_cnt == 0 && bvsession->io_channel) { 792 spdk_put_io_channel(bvsession->io_channel); 793 bvsession->io_channel = NULL; 794 } 795 796 return SPDK_POLLER_BUSY; 797 } 798 799 static int 800 no_bdev_vdev_vq_worker(void *arg) 801 { 802 struct spdk_vhost_virtqueue *vq = arg; 803 804 return _no_bdev_vdev_vq_worker(vq); 805 } 806 807 static int 808 no_bdev_vdev_worker(void *arg) 809 { 810 struct spdk_vhost_blk_session *bvsession = arg; 811 struct spdk_vhost_session *vsession = &bvsession->vsession; 812 uint16_t q_idx; 813 814 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 815 _no_bdev_vdev_vq_worker(&vsession->virtqueue[q_idx]); 816 } 817 818 return SPDK_POLLER_BUSY; 819 } 820 821 static void 822 vhost_blk_session_unregister_interrupts(struct spdk_vhost_blk_session *bvsession) 823 { 824 struct spdk_vhost_session *vsession = &bvsession->vsession; 825 struct spdk_vhost_virtqueue *vq; 826 int i; 827 828 SPDK_DEBUGLOG(vhost_blk, "unregister virtqueues interrupt\n"); 829 for (i = 0; i < vsession->max_queues; i++) { 830 vq = &vsession->virtqueue[i]; 831 if (vq->intr == NULL) { 832 break; 833 } 834 835 SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n", 836 i, vq->vring.kickfd); 837 spdk_interrupt_unregister(&vq->intr); 838 } 839 } 840 841 static int 842 vhost_blk_session_register_interrupts(struct spdk_vhost_blk_session *bvsession, 843 spdk_interrupt_fn fn) 844 { 845 struct spdk_vhost_session *vsession = &bvsession->vsession; 846 struct spdk_vhost_virtqueue *vq = NULL; 847 int i; 848 849 SPDK_DEBUGLOG(vhost_blk, "Register virtqueues interrupt\n"); 850 for (i = 0; i < vsession->max_queues; i++) { 851 vq = &vsession->virtqueue[i]; 852 SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n", 853 i, vq->vring.kickfd); 854 855 vq->intr = SPDK_INTERRUPT_REGISTER(vq->vring.kickfd, fn, vq); 856 if (vq->intr == NULL) { 857 SPDK_ERRLOG("Fail to register req notifier handler.\n"); 858 goto err; 859 } 860 } 861 862 return 0; 863 864 err: 865 vhost_blk_session_unregister_interrupts(bvsession); 866 867 return -1; 868 } 869 870 static struct spdk_vhost_blk_dev * 871 to_blk_dev(struct spdk_vhost_dev *vdev) 872 { 873 if (vdev == NULL) { 874 return NULL; 875 } 876 877 if (vdev->backend != &vhost_blk_device_backend) { 878 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 879 return NULL; 880 } 881 882 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 883 } 884 885 static int 886 vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev, 887 struct spdk_vhost_session *vsession, 888 void *ctx) 889 { 890 #if RTE_VERSION >= RTE_VERSION_NUM(20, 02, 0, 0) 891 SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid); 892 rte_vhost_slave_config_change(vsession->vid, false); 893 #else 894 SPDK_NOTICELOG("bdev does not support resize until DPDK submodule version >= 20.02\n"); 895 #endif 896 897 return 0; 898 } 899 900 static void 901 blk_resize_cb(void *resize_ctx) 902 { 903 struct spdk_vhost_blk_dev *bvdev = resize_ctx; 904 905 spdk_vhost_lock(); 906 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_resize_cb, 907 NULL, NULL); 908 spdk_vhost_unlock(); 909 } 910 911 static void 912 vhost_dev_bdev_remove_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx) 913 { 914 915 /* All sessions have been notified, time to close the bdev */ 916 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 917 918 assert(bvdev != NULL); 919 spdk_put_io_channel(bvdev->dummy_io_channel); 920 spdk_bdev_close(bvdev->bdev_desc); 921 bvdev->bdev_desc = NULL; 922 bvdev->bdev = NULL; 923 } 924 925 static int 926 vhost_session_bdev_remove_cb(struct spdk_vhost_dev *vdev, 927 struct spdk_vhost_session *vsession, 928 void *ctx) 929 { 930 struct spdk_vhost_blk_session *bvsession; 931 int rc; 932 933 bvsession = (struct spdk_vhost_blk_session *)vsession; 934 if (bvsession->requestq_poller) { 935 spdk_poller_unregister(&bvsession->requestq_poller); 936 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0); 937 } 938 939 if (vsession->virtqueue[0].intr) { 940 vhost_blk_session_unregister_interrupts(bvsession); 941 rc = vhost_blk_session_register_interrupts(bvsession, no_bdev_vdev_vq_worker); 942 if (rc) { 943 SPDK_ERRLOG("%s: Interrupt register failed\n", vsession->name); 944 return -1; 945 } 946 947 } 948 949 return 0; 950 } 951 952 static void 953 bdev_remove_cb(void *remove_ctx) 954 { 955 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 956 957 SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n", 958 bvdev->vdev.name); 959 960 spdk_vhost_lock(); 961 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_remove_cb, 962 vhost_dev_bdev_remove_cpl_cb, NULL); 963 spdk_vhost_unlock(); 964 } 965 966 static void 967 bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 968 void *event_ctx) 969 { 970 SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n", 971 type, 972 bdev->name); 973 974 switch (type) { 975 case SPDK_BDEV_EVENT_REMOVE: 976 SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_REMOVE)\n", bdev->name); 977 bdev_remove_cb(event_ctx); 978 break; 979 case SPDK_BDEV_EVENT_RESIZE: 980 SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_RESIZE)\n", bdev->name); 981 blk_resize_cb(event_ctx); 982 break; 983 default: 984 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 985 break; 986 } 987 } 988 989 static void 990 free_task_pool(struct spdk_vhost_blk_session *bvsession) 991 { 992 struct spdk_vhost_session *vsession = &bvsession->vsession; 993 struct spdk_vhost_virtqueue *vq; 994 uint16_t i; 995 996 for (i = 0; i < vsession->max_queues; i++) { 997 vq = &vsession->virtqueue[i]; 998 if (vq->tasks == NULL) { 999 continue; 1000 } 1001 1002 spdk_free(vq->tasks); 1003 vq->tasks = NULL; 1004 } 1005 } 1006 1007 static int 1008 alloc_task_pool(struct spdk_vhost_blk_session *bvsession) 1009 { 1010 struct spdk_vhost_session *vsession = &bvsession->vsession; 1011 struct spdk_vhost_virtqueue *vq; 1012 struct spdk_vhost_blk_task *task; 1013 uint32_t task_cnt; 1014 uint16_t i; 1015 uint32_t j; 1016 1017 for (i = 0; i < vsession->max_queues; i++) { 1018 vq = &vsession->virtqueue[i]; 1019 if (vq->vring.desc == NULL) { 1020 continue; 1021 } 1022 1023 task_cnt = vq->vring.size; 1024 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 1025 /* sanity check */ 1026 SPDK_ERRLOG("%s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 1027 vsession->name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 1028 free_task_pool(bvsession); 1029 return -1; 1030 } 1031 vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 1032 SPDK_CACHE_LINE_SIZE, NULL, 1033 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1034 if (vq->tasks == NULL) { 1035 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 1036 vsession->name, task_cnt, i); 1037 free_task_pool(bvsession); 1038 return -1; 1039 } 1040 1041 for (j = 0; j < task_cnt; j++) { 1042 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 1043 task->bvsession = bvsession; 1044 task->req_idx = j; 1045 task->vq = vq; 1046 } 1047 } 1048 1049 return 0; 1050 } 1051 1052 static int 1053 vhost_blk_start_cb(struct spdk_vhost_dev *vdev, 1054 struct spdk_vhost_session *vsession, void *unused) 1055 { 1056 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 1057 struct spdk_vhost_blk_dev *bvdev; 1058 int i, rc = 0; 1059 1060 bvdev = to_blk_dev(vdev); 1061 assert(bvdev != NULL); 1062 bvsession->bvdev = bvdev; 1063 1064 /* validate all I/O queues are in a contiguous index range */ 1065 for (i = 0; i < vsession->max_queues; i++) { 1066 /* vring.desc and vring.desc_packed are in a union struct 1067 * so q->vring.desc can replace q->vring.desc_packed. 1068 */ 1069 if (vsession->virtqueue[i].vring.desc == NULL) { 1070 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i); 1071 rc = -1; 1072 goto out; 1073 } 1074 } 1075 1076 rc = alloc_task_pool(bvsession); 1077 if (rc != 0) { 1078 SPDK_ERRLOG("%s: failed to alloc task pool.\n", vsession->name); 1079 goto out; 1080 } 1081 1082 if (bvdev->bdev) { 1083 bvsession->io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 1084 if (!bvsession->io_channel) { 1085 free_task_pool(bvsession); 1086 SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name); 1087 rc = -1; 1088 goto out; 1089 } 1090 } 1091 1092 if (spdk_interrupt_mode_is_enabled()) { 1093 rc = vhost_blk_session_register_interrupts(bvsession, 1094 bvdev->bdev ? vdev_vq_worker : no_bdev_vdev_vq_worker); 1095 if (rc) { 1096 SPDK_ERRLOG("%s: Interrupt register failed\n", vsession->name); 1097 goto out; 1098 } 1099 SPDK_INFOLOG(vhost, "%s: started interrupt source on lcore %d\n", 1100 vsession->name, spdk_env_get_current_core()); 1101 } else { 1102 bvsession->requestq_poller = SPDK_POLLER_REGISTER(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 1103 bvsession, 0); 1104 SPDK_INFOLOG(vhost, "%s: started poller on lcore %d\n", 1105 vsession->name, spdk_env_get_current_core()); 1106 } 1107 1108 out: 1109 vhost_session_start_done(vsession, rc); 1110 return rc; 1111 } 1112 1113 static int 1114 vhost_blk_start(struct spdk_vhost_session *vsession) 1115 { 1116 return vhost_session_send_event(vsession, vhost_blk_start_cb, 1117 3, "start session"); 1118 } 1119 1120 static int 1121 destroy_session_poller_cb(void *arg) 1122 { 1123 struct spdk_vhost_blk_session *bvsession = arg; 1124 struct spdk_vhost_session *vsession = &bvsession->vsession; 1125 int i; 1126 1127 if (vsession->task_cnt > 0) { 1128 return SPDK_POLLER_BUSY; 1129 } 1130 1131 if (spdk_vhost_trylock() != 0) { 1132 return SPDK_POLLER_BUSY; 1133 } 1134 1135 for (i = 0; i < vsession->max_queues; i++) { 1136 vsession->virtqueue[i].next_event_time = 0; 1137 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 1138 } 1139 1140 SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n", 1141 vsession->name, spdk_env_get_current_core()); 1142 1143 if (bvsession->io_channel) { 1144 spdk_put_io_channel(bvsession->io_channel); 1145 bvsession->io_channel = NULL; 1146 } 1147 1148 free_task_pool(bvsession); 1149 spdk_poller_unregister(&bvsession->stop_poller); 1150 vhost_session_stop_done(vsession, 0); 1151 1152 spdk_vhost_unlock(); 1153 return SPDK_POLLER_BUSY; 1154 } 1155 1156 static int 1157 vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, 1158 struct spdk_vhost_session *vsession, void *unused) 1159 { 1160 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 1161 1162 spdk_poller_unregister(&bvsession->requestq_poller); 1163 1164 if (vsession->virtqueue[0].intr) { 1165 vhost_blk_session_unregister_interrupts(bvsession); 1166 } 1167 1168 bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb, 1169 bvsession, 1000); 1170 return 0; 1171 } 1172 1173 static int 1174 vhost_blk_stop(struct spdk_vhost_session *vsession) 1175 { 1176 return vhost_session_send_event(vsession, vhost_blk_stop_cb, 1177 3, "stop session"); 1178 } 1179 1180 static void 1181 vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1182 { 1183 struct spdk_vhost_blk_dev *bvdev; 1184 1185 bvdev = to_blk_dev(vdev); 1186 assert(bvdev != NULL); 1187 1188 spdk_json_write_named_object_begin(w, "block"); 1189 1190 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1191 1192 spdk_json_write_name(w, "bdev"); 1193 if (bvdev->bdev) { 1194 spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev)); 1195 } else { 1196 spdk_json_write_null(w); 1197 } 1198 1199 spdk_json_write_object_end(w); 1200 } 1201 1202 static void 1203 vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1204 { 1205 struct spdk_vhost_blk_dev *bvdev; 1206 1207 bvdev = to_blk_dev(vdev); 1208 assert(bvdev != NULL); 1209 1210 if (!bvdev->bdev) { 1211 return; 1212 } 1213 1214 spdk_json_write_object_begin(w); 1215 spdk_json_write_named_string(w, "method", "vhost_create_blk_controller"); 1216 1217 spdk_json_write_named_object_begin(w, "params"); 1218 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1219 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 1220 spdk_json_write_named_string(w, "cpumask", 1221 spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread))); 1222 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1223 spdk_json_write_object_end(w); 1224 1225 spdk_json_write_object_end(w); 1226 } 1227 1228 static int vhost_blk_destroy(struct spdk_vhost_dev *dev); 1229 1230 static int 1231 vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 1232 uint32_t len) 1233 { 1234 struct virtio_blk_config blkcfg; 1235 struct spdk_vhost_blk_dev *bvdev; 1236 struct spdk_bdev *bdev; 1237 uint32_t blk_size; 1238 uint64_t blkcnt; 1239 1240 memset(&blkcfg, 0, sizeof(blkcfg)); 1241 bvdev = to_blk_dev(vdev); 1242 assert(bvdev != NULL); 1243 bdev = bvdev->bdev; 1244 if (bdev == NULL) { 1245 /* We can't just return -1 here as this GET_CONFIG message might 1246 * be caused by a QEMU VM reboot. Returning -1 will indicate an 1247 * error to QEMU, who might then decide to terminate itself. 1248 * We don't want that. A simple reboot shouldn't break the system. 1249 * 1250 * Presenting a block device with block size 0 and block count 0 1251 * doesn't cause any problems on QEMU side and the virtio-pci 1252 * device is even still available inside the VM, but there will 1253 * be no block device created for it - the kernel drivers will 1254 * silently reject it. 1255 */ 1256 blk_size = 0; 1257 blkcnt = 0; 1258 } else { 1259 blk_size = spdk_bdev_get_block_size(bdev); 1260 blkcnt = spdk_bdev_get_num_blocks(bdev); 1261 if (spdk_bdev_get_buf_align(bdev) > 1) { 1262 blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE; 1263 blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, BDEV_IO_NUM_CHILD_IOV - 2 - 1); 1264 } else { 1265 blkcfg.size_max = 131072; 1266 /* -2 for REQ and RESP and -1 for region boundary splitting */ 1267 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 1268 } 1269 } 1270 1271 blkcfg.blk_size = blk_size; 1272 /* minimum I/O size in blocks */ 1273 blkcfg.min_io_size = 1; 1274 /* expressed in 512 Bytes sectors */ 1275 blkcfg.capacity = (blkcnt * blk_size) / 512; 1276 /* QEMU can overwrite this value when started */ 1277 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 1278 1279 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1280 /* 16MiB, expressed in 512 Bytes */ 1281 blkcfg.max_discard_sectors = 32768; 1282 blkcfg.max_discard_seg = 1; 1283 blkcfg.discard_sector_alignment = blk_size / 512; 1284 } 1285 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1286 blkcfg.max_write_zeroes_sectors = 32768; 1287 blkcfg.max_write_zeroes_seg = 1; 1288 } 1289 1290 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 1291 1292 return 0; 1293 } 1294 1295 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 1296 .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session), 1297 .start_session = vhost_blk_start, 1298 .stop_session = vhost_blk_stop, 1299 .vhost_get_config = vhost_blk_get_config, 1300 .dump_info_json = vhost_blk_dump_info_json, 1301 .write_config_json = vhost_blk_write_config_json, 1302 .remove_device = vhost_blk_destroy, 1303 }; 1304 1305 int 1306 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, 1307 bool readonly, bool packed_ring) 1308 { 1309 struct spdk_vhost_blk_dev *bvdev = NULL; 1310 struct spdk_vhost_dev *vdev; 1311 struct spdk_bdev *bdev; 1312 int ret = 0; 1313 1314 spdk_vhost_lock(); 1315 1316 bvdev = calloc(1, sizeof(*bvdev)); 1317 if (bvdev == NULL) { 1318 ret = -ENOMEM; 1319 goto out; 1320 } 1321 1322 ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc); 1323 if (ret != 0) { 1324 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n", 1325 name, dev_name, ret); 1326 goto out; 1327 } 1328 bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc); 1329 1330 vdev = &bvdev->vdev; 1331 vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE; 1332 vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES; 1333 vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES; 1334 1335 vdev->virtio_features |= (uint64_t)packed_ring << VIRTIO_F_RING_PACKED; 1336 1337 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1338 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD); 1339 } 1340 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1341 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 1342 } 1343 if (readonly) { 1344 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO); 1345 } 1346 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 1347 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH); 1348 } 1349 1350 /* 1351 * When starting qemu with vhost-user-blk multiqueue, the vhost device will 1352 * be started/stopped many times, related to the queues num, as the 1353 * vhost-user backend doesn't know the exact number of queues used for this 1354 * device. The target have to stop and start the device once got a valid 1355 * IO queue. 1356 * When stoping and starting the vhost device, the backend bdev io device 1357 * will be deleted and created repeatedly. 1358 * Hold a bdev reference so that in the struct spdk_vhost_blk_dev, so that 1359 * the io device will not be deleted. 1360 */ 1361 bvdev->dummy_io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 1362 1363 bvdev->bdev = bdev; 1364 bvdev->readonly = readonly; 1365 ret = vhost_dev_register(vdev, name, cpumask, &vhost_blk_device_backend); 1366 if (ret != 0) { 1367 spdk_put_io_channel(bvdev->dummy_io_channel); 1368 spdk_bdev_close(bvdev->bdev_desc); 1369 goto out; 1370 } 1371 1372 SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name); 1373 out: 1374 if (ret != 0 && bvdev) { 1375 free(bvdev); 1376 } 1377 spdk_vhost_unlock(); 1378 return ret; 1379 } 1380 1381 static int 1382 vhost_blk_destroy(struct spdk_vhost_dev *vdev) 1383 { 1384 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 1385 int rc; 1386 1387 assert(bvdev != NULL); 1388 1389 rc = vhost_dev_unregister(&bvdev->vdev); 1390 if (rc != 0) { 1391 return rc; 1392 } 1393 1394 /* if the bdev is removed, don't need call spdk_put_io_channel. */ 1395 if (bvdev->bdev) { 1396 spdk_put_io_channel(bvdev->dummy_io_channel); 1397 } 1398 1399 if (bvdev->bdev_desc) { 1400 spdk_bdev_close(bvdev->bdev_desc); 1401 bvdev->bdev_desc = NULL; 1402 } 1403 bvdev->bdev = NULL; 1404 1405 free(bvdev); 1406 return 0; 1407 } 1408 1409 SPDK_LOG_REGISTER_COMPONENT(vhost_blk) 1410 SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data) 1411