1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/conf.h" 40 #include "spdk/thread.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/util.h" 44 #include "spdk/vhost.h" 45 46 #include "vhost_internal.h" 47 #include <rte_version.h> 48 49 /* Minimal set of features supported by every SPDK VHOST-BLK device */ 50 #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \ 51 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 52 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 53 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \ 54 (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 55 (1ULL << VIRTIO_BLK_F_MQ)) 56 57 /* Not supported features */ 58 #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \ 59 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 60 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI)) 61 62 /* Vhost-blk support protocol features */ 63 #ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB 64 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \ 65 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) 66 #else 67 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) 68 #endif 69 70 struct spdk_vhost_blk_task { 71 struct spdk_bdev_io *bdev_io; 72 struct spdk_vhost_blk_session *bvsession; 73 struct spdk_vhost_virtqueue *vq; 74 75 volatile uint8_t *status; 76 77 uint16_t req_idx; 78 uint16_t num_descs; 79 uint16_t buffer_id; 80 81 /* for io wait */ 82 struct spdk_bdev_io_wait_entry bdev_io_wait; 83 84 /* If set, the task is currently used for I/O processing. */ 85 bool used; 86 87 /** Number of bytes that were written. */ 88 uint32_t used_len; 89 uint16_t iovcnt; 90 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 91 }; 92 93 struct spdk_vhost_blk_dev { 94 struct spdk_vhost_dev vdev; 95 struct spdk_bdev *bdev; 96 struct spdk_bdev_desc *bdev_desc; 97 /* dummy_io_channel is used to hold a bdev reference */ 98 struct spdk_io_channel *dummy_io_channel; 99 bool readonly; 100 }; 101 102 struct spdk_vhost_blk_session { 103 /* The parent session must be the very first field in this struct */ 104 struct spdk_vhost_session vsession; 105 struct spdk_vhost_blk_dev *bvdev; 106 struct spdk_poller *requestq_poller; 107 struct spdk_io_channel *io_channel; 108 struct spdk_poller *stop_poller; 109 }; 110 111 /* forward declaration */ 112 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 113 114 static int 115 process_blk_request(struct spdk_vhost_blk_task *task, 116 struct spdk_vhost_blk_session *bvsession, 117 struct spdk_vhost_virtqueue *vq); 118 119 static void 120 blk_task_finish(struct spdk_vhost_blk_task *task) 121 { 122 assert(task->bvsession->vsession.task_cnt > 0); 123 task->bvsession->vsession.task_cnt--; 124 task->used = false; 125 } 126 127 static void 128 blk_task_init(struct spdk_vhost_blk_task *task) 129 { 130 task->used = true; 131 task->iovcnt = SPDK_COUNTOF(task->iovs); 132 task->status = NULL; 133 task->used_len = 0; 134 } 135 136 static void 137 blk_task_enqueue(struct spdk_vhost_blk_task *task) 138 { 139 if (task->vq->packed.packed_ring) { 140 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq, 141 task->num_descs, 142 task->buffer_id, task->used_len); 143 } else { 144 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, 145 task->req_idx, task->used_len); 146 } 147 } 148 149 static void 150 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 151 { 152 if (task->status) { 153 *task->status = status; 154 } 155 156 blk_task_enqueue(task); 157 blk_task_finish(task); 158 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 159 } 160 161 /* 162 * Process task's descriptor chain and setup data related fields. 163 * Return 164 * total size of suplied buffers 165 * 166 * FIXME: Make this function return to rd_cnt and wr_cnt 167 */ 168 static int 169 blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession, 170 struct spdk_vhost_virtqueue *vq, 171 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 172 { 173 struct spdk_vhost_session *vsession = &bvsession->vsession; 174 struct spdk_vhost_dev *vdev = vsession->vdev; 175 struct vring_desc *desc, *desc_table; 176 uint16_t out_cnt = 0, cnt = 0; 177 uint32_t desc_table_size, len = 0; 178 uint32_t desc_handled_cnt; 179 int rc; 180 181 rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size); 182 if (rc != 0) { 183 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 184 return -1; 185 } 186 187 desc_handled_cnt = 0; 188 while (1) { 189 /* 190 * Maximum cnt reached? 191 * Should not happen if request is well formatted, otherwise this is a BUG. 192 */ 193 if (spdk_unlikely(cnt == *iovs_cnt)) { 194 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 195 vsession->name, req_idx); 196 return -1; 197 } 198 199 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) { 200 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 201 vsession->name, req_idx, cnt); 202 return -1; 203 } 204 205 len += desc->len; 206 207 out_cnt += vhost_vring_desc_is_wr(desc); 208 209 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 210 if (rc != 0) { 211 SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 212 vsession->name, req_idx); 213 return -1; 214 } else if (desc == NULL) { 215 break; 216 } 217 218 desc_handled_cnt++; 219 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { 220 /* Break a cycle and report an error, if any. */ 221 SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n", 222 vsession->name, desc_table_size, desc_handled_cnt); 223 return -1; 224 } 225 } 226 227 /* 228 * There must be least two descriptors. 229 * First contain request so it must be readable. 230 * Last descriptor contain buffer for response so it must be writable. 231 */ 232 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 233 return -1; 234 } 235 236 *length = len; 237 *iovs_cnt = cnt; 238 return 0; 239 } 240 241 static int 242 blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession, 243 struct spdk_vhost_virtqueue *vq, 244 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 245 { 246 struct spdk_vhost_session *vsession = &bvsession->vsession; 247 struct spdk_vhost_dev *vdev = vsession->vdev; 248 struct vring_packed_desc *desc = NULL, *desc_table; 249 uint16_t out_cnt = 0, cnt = 0; 250 uint32_t desc_table_size, len = 0; 251 int rc = 0; 252 253 rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc, 254 &desc_table, &desc_table_size); 255 if (spdk_unlikely(rc != 0)) { 256 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 257 return rc; 258 } 259 260 if (desc_table != NULL) { 261 req_idx = 0; 262 } 263 264 while (1) { 265 /* 266 * Maximum cnt reached? 267 * Should not happen if request is well formatted, otherwise this is a BUG. 268 */ 269 if (spdk_unlikely(cnt == *iovs_cnt)) { 270 SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 271 vsession->name, req_idx); 272 return -EINVAL; 273 } 274 275 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) { 276 SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 277 vsession->name, req_idx, cnt); 278 return -EINVAL; 279 } 280 281 len += desc->len; 282 out_cnt += vhost_vring_packed_desc_is_wr(desc); 283 284 /* desc is NULL means we reach the last desc of this request */ 285 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size); 286 if (desc == NULL) { 287 break; 288 } 289 } 290 291 /* 292 * There must be least two descriptors. 293 * First contain request so it must be readable. 294 * Last descriptor contain buffer for response so it must be writable. 295 */ 296 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 297 return -EINVAL; 298 } 299 300 *length = len; 301 *iovs_cnt = cnt; 302 303 return 0; 304 } 305 306 static void 307 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 308 { 309 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 310 311 blk_task_enqueue(task); 312 313 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 314 task->req_idx, success ? "OK" : "FAIL"); 315 blk_task_finish(task); 316 } 317 318 static void 319 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 320 { 321 struct spdk_vhost_blk_task *task = cb_arg; 322 323 spdk_bdev_free_io(bdev_io); 324 blk_request_finish(success, task); 325 } 326 327 static void 328 blk_request_resubmit(void *arg) 329 { 330 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 331 int rc = 0; 332 333 blk_task_init(task); 334 335 rc = process_blk_request(task, task->bvsession, task->vq); 336 if (rc == 0) { 337 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 338 } else { 339 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 340 } 341 } 342 343 static inline void 344 blk_request_queue_io(struct spdk_vhost_blk_task *task) 345 { 346 int rc; 347 struct spdk_vhost_blk_session *bvsession = task->bvsession; 348 struct spdk_bdev *bdev = bvsession->bvdev->bdev; 349 350 task->bdev_io_wait.bdev = bdev; 351 task->bdev_io_wait.cb_fn = blk_request_resubmit; 352 task->bdev_io_wait.cb_arg = task; 353 354 rc = spdk_bdev_queue_io_wait(bdev, bvsession->io_channel, &task->bdev_io_wait); 355 if (rc != 0) { 356 SPDK_ERRLOG("%s: failed to queue I/O, rc=%d\n", bvsession->vsession.name, rc); 357 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 358 } 359 } 360 361 static int 362 process_blk_request(struct spdk_vhost_blk_task *task, 363 struct spdk_vhost_blk_session *bvsession, 364 struct spdk_vhost_virtqueue *vq) 365 { 366 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 367 const struct virtio_blk_outhdr *req; 368 struct virtio_blk_discard_write_zeroes *desc; 369 struct iovec *iov; 370 uint32_t type; 371 uint32_t payload_len; 372 uint64_t flush_bytes; 373 int rc; 374 375 if (vq->packed.packed_ring) { 376 rc = blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 377 &payload_len); 378 } else { 379 rc = blk_iovs_split_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 380 &payload_len); 381 } 382 383 if (rc) { 384 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 385 /* Only READ and WRITE are supported for now. */ 386 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 387 return -1; 388 } 389 390 iov = &task->iovs[0]; 391 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 392 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 393 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 394 iov->iov_len, sizeof(*req), task->req_idx); 395 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 396 return -1; 397 } 398 399 req = iov->iov_base; 400 401 iov = &task->iovs[task->iovcnt - 1]; 402 if (spdk_unlikely(iov->iov_len != 1)) { 403 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 404 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 405 iov->iov_len, 1, task->req_idx); 406 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 407 return -1; 408 } 409 410 task->status = iov->iov_base; 411 payload_len -= sizeof(*req) + sizeof(*task->status); 412 task->iovcnt -= 2; 413 414 type = req->type; 415 #ifdef VIRTIO_BLK_T_BARRIER 416 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 417 type &= ~VIRTIO_BLK_T_BARRIER; 418 #endif 419 420 switch (type) { 421 case VIRTIO_BLK_T_IN: 422 case VIRTIO_BLK_T_OUT: 423 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { 424 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 425 type ? "WRITE" : "READ", task->req_idx); 426 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 427 return -1; 428 } 429 430 if (type == VIRTIO_BLK_T_IN) { 431 task->used_len = payload_len + sizeof(*task->status); 432 rc = spdk_bdev_readv(bvdev->bdev_desc, bvsession->io_channel, 433 &task->iovs[1], task->iovcnt, req->sector * 512, 434 payload_len, blk_request_complete_cb, task); 435 } else if (!bvdev->readonly) { 436 task->used_len = sizeof(*task->status); 437 rc = spdk_bdev_writev(bvdev->bdev_desc, bvsession->io_channel, 438 &task->iovs[1], task->iovcnt, req->sector * 512, 439 payload_len, blk_request_complete_cb, task); 440 } else { 441 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 442 rc = -1; 443 } 444 445 if (rc) { 446 if (rc == -ENOMEM) { 447 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 448 blk_request_queue_io(task); 449 } else { 450 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 451 return -1; 452 } 453 } 454 break; 455 case VIRTIO_BLK_T_DISCARD: 456 desc = task->iovs[1].iov_base; 457 if (payload_len != sizeof(*desc)) { 458 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 459 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 460 return -1; 461 } 462 463 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvsession->io_channel, 464 desc->sector * 512, desc->num_sectors * 512, 465 blk_request_complete_cb, task); 466 if (rc) { 467 if (rc == -ENOMEM) { 468 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 469 blk_request_queue_io(task); 470 } else { 471 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 472 return -1; 473 } 474 } 475 break; 476 case VIRTIO_BLK_T_WRITE_ZEROES: 477 desc = task->iovs[1].iov_base; 478 if (payload_len != sizeof(*desc)) { 479 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 480 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 481 return -1; 482 } 483 484 /* Zeroed and Unmap the range, SPDK doen't support it. */ 485 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 486 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 487 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 488 return -1; 489 } 490 491 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvsession->io_channel, 492 desc->sector * 512, desc->num_sectors * 512, 493 blk_request_complete_cb, task); 494 if (rc) { 495 if (rc == -ENOMEM) { 496 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 497 blk_request_queue_io(task); 498 } else { 499 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 500 return -1; 501 } 502 } 503 break; 504 case VIRTIO_BLK_T_FLUSH: 505 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev); 506 if (req->sector != 0) { 507 SPDK_NOTICELOG("sector must be zero for flush command\n"); 508 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 509 return -1; 510 } 511 rc = spdk_bdev_flush(bvdev->bdev_desc, bvsession->io_channel, 512 0, flush_bytes, 513 blk_request_complete_cb, task); 514 if (rc) { 515 if (rc == -ENOMEM) { 516 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 517 blk_request_queue_io(task); 518 } else { 519 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 520 return -1; 521 } 522 } 523 break; 524 case VIRTIO_BLK_T_GET_ID: 525 if (!task->iovcnt || !payload_len) { 526 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 527 return -1; 528 } 529 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 530 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 531 task->used_len, ' '); 532 blk_request_finish(true, task); 533 break; 534 default: 535 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 536 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 537 return -1; 538 } 539 540 return 0; 541 } 542 543 static void 544 process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx) 545 { 546 struct spdk_vhost_blk_task *task; 547 uint16_t task_idx = req_idx, num_descs; 548 549 if (vq->packed.packed_ring) { 550 /* Packed ring used the buffer_id as the task_idx to get task struct. 551 * In kernel driver, it uses the vq->free_head to set the buffer_id so the value 552 * must be in the range of 0 ~ vring.size. The free_head value must be unique 553 * in the outstanding requests. 554 * We can't use the req_idx as the task_idx because the desc can be reused in 555 * the next phase even when it's not completed in the previous phase. For example, 556 * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving 557 * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used 558 * as task_idx because we will know task[0]->used is true at phase 1. 559 * The split queue is quite different, the desc would insert into the free list when 560 * device completes the request, the driver gets the desc from the free list which 561 * ensures the req_idx is unique in the outstanding requests. 562 */ 563 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 564 } 565 566 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 567 if (spdk_unlikely(task->used)) { 568 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 569 task->bvsession->vsession.name, task_idx); 570 task->used_len = 0; 571 blk_task_enqueue(task); 572 return; 573 } 574 575 if (vq->packed.packed_ring) { 576 task->req_idx = req_idx; 577 task->num_descs = num_descs; 578 task->buffer_id = task_idx; 579 } 580 581 task->bvsession->vsession.task_cnt++; 582 583 blk_task_init(task); 584 585 if (process_blk_request(task, task->bvsession, vq) == 0) { 586 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 587 task_idx); 588 } else { 589 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx); 590 } 591 } 592 593 static void 594 submit_inflight_desc(struct spdk_vhost_blk_session *bvsession, 595 struct spdk_vhost_virtqueue *vq) 596 { 597 struct spdk_vhost_session *vsession = &bvsession->vsession; 598 spdk_vhost_resubmit_info *resubmit = vq->vring_inflight.resubmit_inflight; 599 spdk_vhost_resubmit_desc *resubmit_list; 600 uint16_t req_idx; 601 602 if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL)) { 603 return; 604 } 605 606 resubmit_list = resubmit->resubmit_list; 607 while (resubmit->resubmit_num-- > 0) { 608 req_idx = resubmit_list[resubmit->resubmit_num].index; 609 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Start processing request idx %"PRIu16"======\n", 610 req_idx); 611 612 if (spdk_unlikely(req_idx >= vq->vring.size)) { 613 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 614 vsession->name, req_idx, vq->vring.size); 615 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 616 continue; 617 } 618 619 process_blk_task(vq, req_idx); 620 } 621 622 free(resubmit_list); 623 resubmit->resubmit_list = NULL; 624 } 625 626 static void 627 process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 628 { 629 struct spdk_vhost_session *vsession = &bvsession->vsession; 630 uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS]; 631 uint16_t reqs_cnt, i; 632 633 submit_inflight_desc(bvsession, vq); 634 635 reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 636 if (!reqs_cnt) { 637 return; 638 } 639 640 for (i = 0; i < reqs_cnt; i++) { 641 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 642 reqs[i]); 643 644 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 645 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 646 vsession->name, reqs[i], vq->vring.size); 647 vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 648 continue; 649 } 650 651 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]); 652 653 process_blk_task(vq, reqs[i]); 654 } 655 } 656 657 static void 658 process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 659 { 660 uint16_t i = 0; 661 662 while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS && 663 vhost_vq_packed_ring_is_avail(vq)) { 664 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 665 vq->last_avail_idx); 666 667 process_blk_task(vq, vq->last_avail_idx); 668 } 669 } 670 671 static int 672 vdev_worker(void *arg) 673 { 674 struct spdk_vhost_blk_session *bvsession = arg; 675 struct spdk_vhost_session *vsession = &bvsession->vsession; 676 677 uint16_t q_idx; 678 bool packed_ring; 679 680 /* In a session, every vq supports the same format */ 681 packed_ring = vsession->virtqueue[0].packed.packed_ring; 682 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 683 if (packed_ring) { 684 process_packed_vq(bvsession, &vsession->virtqueue[q_idx]); 685 } else { 686 process_vq(bvsession, &vsession->virtqueue[q_idx]); 687 } 688 } 689 690 vhost_session_used_signal(vsession); 691 692 return SPDK_POLLER_BUSY; 693 } 694 695 static void 696 no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 697 { 698 struct spdk_vhost_session *vsession = &bvsession->vsession; 699 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 700 uint32_t length; 701 uint16_t iovcnt, req_idx; 702 703 if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 704 return; 705 } 706 707 iovcnt = SPDK_COUNTOF(iovs); 708 if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) { 709 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 710 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 711 } 712 713 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 714 } 715 716 static void 717 no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 718 { 719 struct spdk_vhost_session *vsession = &bvsession->vsession; 720 struct spdk_vhost_blk_task *task; 721 uint32_t length; 722 uint16_t req_idx = vq->last_avail_idx; 723 uint16_t task_idx, num_descs; 724 725 if (!vhost_vq_packed_ring_is_avail(vq)) { 726 return; 727 } 728 729 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 730 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 731 if (spdk_unlikely(task->used)) { 732 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 733 vsession->name, req_idx); 734 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 735 task->buffer_id, task->used_len); 736 return; 737 } 738 739 task->req_idx = req_idx; 740 task->num_descs = num_descs; 741 task->buffer_id = task_idx; 742 blk_task_init(task); 743 744 if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 745 &length)) { 746 *(volatile uint8_t *)(task->iovs[task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR; 747 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 748 } 749 750 task->used = false; 751 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 752 task->buffer_id, task->used_len); 753 } 754 755 static int 756 no_bdev_vdev_worker(void *arg) 757 { 758 struct spdk_vhost_blk_session *bvsession = arg; 759 struct spdk_vhost_session *vsession = &bvsession->vsession; 760 uint16_t q_idx; 761 bool packed_ring; 762 763 /* In a session, every vq supports the same format */ 764 packed_ring = vsession->virtqueue[0].packed.packed_ring; 765 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 766 if (packed_ring) { 767 no_bdev_process_packed_vq(bvsession, &vsession->virtqueue[q_idx]); 768 } else { 769 no_bdev_process_vq(bvsession, &vsession->virtqueue[q_idx]); 770 } 771 } 772 773 vhost_session_used_signal(vsession); 774 775 if (vsession->task_cnt == 0 && bvsession->io_channel) { 776 spdk_put_io_channel(bvsession->io_channel); 777 bvsession->io_channel = NULL; 778 } 779 780 return SPDK_POLLER_BUSY; 781 } 782 783 static struct spdk_vhost_blk_session * 784 to_blk_session(struct spdk_vhost_session *vsession) 785 { 786 assert(vsession->vdev->backend == &vhost_blk_device_backend); 787 return (struct spdk_vhost_blk_session *)vsession; 788 } 789 790 static struct spdk_vhost_blk_dev * 791 to_blk_dev(struct spdk_vhost_dev *vdev) 792 { 793 if (vdev == NULL) { 794 return NULL; 795 } 796 797 if (vdev->backend != &vhost_blk_device_backend) { 798 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 799 return NULL; 800 } 801 802 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 803 } 804 805 static int 806 vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev, 807 struct spdk_vhost_session *vsession, 808 void *ctx) 809 { 810 #if RTE_VERSION >= RTE_VERSION_NUM(20, 02, 0, 0) 811 SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid); 812 rte_vhost_slave_config_change(vsession->vid, false); 813 #else 814 SPDK_NOTICELOG("bdev does not support resize until DPDK submodule version >= 20.02\n"); 815 #endif 816 817 return 0; 818 } 819 820 static void 821 blk_resize_cb(void *resize_ctx) 822 { 823 struct spdk_vhost_blk_dev *bvdev = resize_ctx; 824 825 spdk_vhost_lock(); 826 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_resize_cb, 827 NULL, NULL); 828 spdk_vhost_unlock(); 829 } 830 831 static void 832 vhost_dev_bdev_remove_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx) 833 { 834 835 /* All sessions have been notified, time to close the bdev */ 836 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 837 838 assert(bvdev != NULL); 839 spdk_put_io_channel(bvdev->dummy_io_channel); 840 spdk_bdev_close(bvdev->bdev_desc); 841 bvdev->bdev_desc = NULL; 842 bvdev->bdev = NULL; 843 } 844 845 static int 846 vhost_session_bdev_remove_cb(struct spdk_vhost_dev *vdev, 847 struct spdk_vhost_session *vsession, 848 void *ctx) 849 { 850 struct spdk_vhost_blk_session *bvsession; 851 852 bvsession = (struct spdk_vhost_blk_session *)vsession; 853 if (bvsession->requestq_poller) { 854 spdk_poller_unregister(&bvsession->requestq_poller); 855 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0); 856 } 857 858 return 0; 859 } 860 861 static void 862 bdev_remove_cb(void *remove_ctx) 863 { 864 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 865 866 SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n", 867 bvdev->vdev.name); 868 869 spdk_vhost_lock(); 870 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_remove_cb, 871 vhost_dev_bdev_remove_cpl_cb, NULL); 872 spdk_vhost_unlock(); 873 } 874 875 static void 876 bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 877 void *event_ctx) 878 { 879 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Bdev event: type %d, name %s\n", 880 type, 881 bdev->name); 882 883 switch (type) { 884 case SPDK_BDEV_EVENT_REMOVE: 885 SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_REMOVE)\n", bdev->name); 886 bdev_remove_cb(event_ctx); 887 break; 888 case SPDK_BDEV_EVENT_RESIZE: 889 SPDK_NOTICELOG("bdev name (%s) received event(SPDK_BDEV_EVENT_RESIZE)\n", bdev->name); 890 blk_resize_cb(event_ctx); 891 break; 892 default: 893 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 894 break; 895 } 896 } 897 898 static void 899 free_task_pool(struct spdk_vhost_blk_session *bvsession) 900 { 901 struct spdk_vhost_session *vsession = &bvsession->vsession; 902 struct spdk_vhost_virtqueue *vq; 903 uint16_t i; 904 905 for (i = 0; i < vsession->max_queues; i++) { 906 vq = &vsession->virtqueue[i]; 907 if (vq->tasks == NULL) { 908 continue; 909 } 910 911 spdk_free(vq->tasks); 912 vq->tasks = NULL; 913 } 914 } 915 916 static int 917 alloc_task_pool(struct spdk_vhost_blk_session *bvsession) 918 { 919 struct spdk_vhost_session *vsession = &bvsession->vsession; 920 struct spdk_vhost_virtqueue *vq; 921 struct spdk_vhost_blk_task *task; 922 uint32_t task_cnt; 923 uint16_t i; 924 uint32_t j; 925 926 for (i = 0; i < vsession->max_queues; i++) { 927 vq = &vsession->virtqueue[i]; 928 if (vq->vring.desc == NULL) { 929 continue; 930 } 931 932 task_cnt = vq->vring.size; 933 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 934 /* sanity check */ 935 SPDK_ERRLOG("%s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 936 vsession->name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 937 free_task_pool(bvsession); 938 return -1; 939 } 940 vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 941 SPDK_CACHE_LINE_SIZE, NULL, 942 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 943 if (vq->tasks == NULL) { 944 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 945 vsession->name, task_cnt, i); 946 free_task_pool(bvsession); 947 return -1; 948 } 949 950 for (j = 0; j < task_cnt; j++) { 951 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 952 task->bvsession = bvsession; 953 task->req_idx = j; 954 task->vq = vq; 955 } 956 } 957 958 return 0; 959 } 960 961 static int 962 vhost_blk_start_cb(struct spdk_vhost_dev *vdev, 963 struct spdk_vhost_session *vsession, void *unused) 964 { 965 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 966 struct spdk_vhost_blk_dev *bvdev; 967 int i, rc = 0; 968 969 bvdev = to_blk_dev(vdev); 970 assert(bvdev != NULL); 971 bvsession->bvdev = bvdev; 972 973 /* validate all I/O queues are in a contiguous index range */ 974 for (i = 0; i < vsession->max_queues; i++) { 975 /* vring.desc and vring.desc_packed are in a union struct 976 * so q->vring.desc can replace q->vring.desc_packed. 977 */ 978 if (vsession->virtqueue[i].vring.desc == NULL) { 979 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i); 980 rc = -1; 981 goto out; 982 } 983 } 984 985 rc = alloc_task_pool(bvsession); 986 if (rc != 0) { 987 SPDK_ERRLOG("%s: failed to alloc task pool.\n", vsession->name); 988 goto out; 989 } 990 991 if (bvdev->bdev) { 992 bvsession->io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 993 if (!bvsession->io_channel) { 994 free_task_pool(bvsession); 995 SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name); 996 rc = -1; 997 goto out; 998 } 999 } 1000 1001 bvsession->requestq_poller = SPDK_POLLER_REGISTER(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 1002 bvsession, 0); 1003 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: started poller on lcore %d\n", 1004 vsession->name, spdk_env_get_current_core()); 1005 out: 1006 vhost_session_start_done(vsession, rc); 1007 return rc; 1008 } 1009 1010 static int 1011 vhost_blk_start(struct spdk_vhost_session *vsession) 1012 { 1013 return vhost_session_send_event(vsession, vhost_blk_start_cb, 1014 3, "start session"); 1015 } 1016 1017 static int 1018 destroy_session_poller_cb(void *arg) 1019 { 1020 struct spdk_vhost_blk_session *bvsession = arg; 1021 struct spdk_vhost_session *vsession = &bvsession->vsession; 1022 int i; 1023 1024 if (vsession->task_cnt > 0) { 1025 return SPDK_POLLER_BUSY; 1026 } 1027 1028 if (spdk_vhost_trylock() != 0) { 1029 return SPDK_POLLER_BUSY; 1030 } 1031 1032 for (i = 0; i < vsession->max_queues; i++) { 1033 vsession->virtqueue[i].next_event_time = 0; 1034 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 1035 } 1036 1037 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: stopping poller on lcore %d\n", 1038 vsession->name, spdk_env_get_current_core()); 1039 1040 if (bvsession->io_channel) { 1041 spdk_put_io_channel(bvsession->io_channel); 1042 bvsession->io_channel = NULL; 1043 } 1044 1045 free_task_pool(bvsession); 1046 spdk_poller_unregister(&bvsession->stop_poller); 1047 vhost_session_stop_done(vsession, 0); 1048 1049 spdk_vhost_unlock(); 1050 return SPDK_POLLER_BUSY; 1051 } 1052 1053 static int 1054 vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, 1055 struct spdk_vhost_session *vsession, void *unused) 1056 { 1057 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 1058 1059 spdk_poller_unregister(&bvsession->requestq_poller); 1060 bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb, 1061 bvsession, 1000); 1062 return 0; 1063 } 1064 1065 static int 1066 vhost_blk_stop(struct spdk_vhost_session *vsession) 1067 { 1068 return vhost_session_send_event(vsession, vhost_blk_stop_cb, 1069 3, "stop session"); 1070 } 1071 1072 static void 1073 vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1074 { 1075 struct spdk_vhost_blk_dev *bvdev; 1076 1077 bvdev = to_blk_dev(vdev); 1078 assert(bvdev != NULL); 1079 1080 spdk_json_write_named_object_begin(w, "block"); 1081 1082 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1083 1084 spdk_json_write_name(w, "bdev"); 1085 if (bvdev->bdev) { 1086 spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev)); 1087 } else { 1088 spdk_json_write_null(w); 1089 } 1090 1091 spdk_json_write_object_end(w); 1092 } 1093 1094 static void 1095 vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1096 { 1097 struct spdk_vhost_blk_dev *bvdev; 1098 1099 bvdev = to_blk_dev(vdev); 1100 assert(bvdev != NULL); 1101 1102 if (!bvdev->bdev) { 1103 return; 1104 } 1105 1106 spdk_json_write_object_begin(w); 1107 spdk_json_write_named_string(w, "method", "vhost_create_blk_controller"); 1108 1109 spdk_json_write_named_object_begin(w, "params"); 1110 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1111 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 1112 spdk_json_write_named_string(w, "cpumask", 1113 spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread))); 1114 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1115 spdk_json_write_object_end(w); 1116 1117 spdk_json_write_object_end(w); 1118 } 1119 1120 static int vhost_blk_destroy(struct spdk_vhost_dev *dev); 1121 1122 static int 1123 vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 1124 uint32_t len) 1125 { 1126 struct virtio_blk_config blkcfg; 1127 struct spdk_vhost_blk_dev *bvdev; 1128 struct spdk_bdev *bdev; 1129 uint32_t blk_size; 1130 uint64_t blkcnt; 1131 1132 memset(&blkcfg, 0, sizeof(blkcfg)); 1133 bvdev = to_blk_dev(vdev); 1134 assert(bvdev != NULL); 1135 bdev = bvdev->bdev; 1136 if (bdev == NULL) { 1137 /* We can't just return -1 here as this GET_CONFIG message might 1138 * be caused by a QEMU VM reboot. Returning -1 will indicate an 1139 * error to QEMU, who might then decide to terminate itself. 1140 * We don't want that. A simple reboot shouldn't break the system. 1141 * 1142 * Presenting a block device with block size 0 and block count 0 1143 * doesn't cause any problems on QEMU side and the virtio-pci 1144 * device is even still available inside the VM, but there will 1145 * be no block device created for it - the kernel drivers will 1146 * silently reject it. 1147 */ 1148 blk_size = 0; 1149 blkcnt = 0; 1150 } else { 1151 blk_size = spdk_bdev_get_block_size(bdev); 1152 blkcnt = spdk_bdev_get_num_blocks(bdev); 1153 if (spdk_bdev_get_buf_align(bdev) > 1) { 1154 blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE; 1155 blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, BDEV_IO_NUM_CHILD_IOV - 2 - 1); 1156 } else { 1157 blkcfg.size_max = 131072; 1158 /* -2 for REQ and RESP and -1 for region boundary splitting */ 1159 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 1160 } 1161 } 1162 1163 blkcfg.blk_size = blk_size; 1164 /* minimum I/O size in blocks */ 1165 blkcfg.min_io_size = 1; 1166 /* expressed in 512 Bytes sectors */ 1167 blkcfg.capacity = (blkcnt * blk_size) / 512; 1168 /* QEMU can overwrite this value when started */ 1169 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 1170 1171 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1172 /* 16MiB, expressed in 512 Bytes */ 1173 blkcfg.max_discard_sectors = 32768; 1174 blkcfg.max_discard_seg = 1; 1175 blkcfg.discard_sector_alignment = blk_size / 512; 1176 } 1177 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1178 blkcfg.max_write_zeroes_sectors = 32768; 1179 blkcfg.max_write_zeroes_seg = 1; 1180 } 1181 1182 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 1183 1184 return 0; 1185 } 1186 1187 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 1188 .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session), 1189 .start_session = vhost_blk_start, 1190 .stop_session = vhost_blk_stop, 1191 .vhost_get_config = vhost_blk_get_config, 1192 .dump_info_json = vhost_blk_dump_info_json, 1193 .write_config_json = vhost_blk_write_config_json, 1194 .remove_device = vhost_blk_destroy, 1195 }; 1196 1197 int 1198 vhost_blk_controller_construct(void) 1199 { 1200 struct spdk_conf_section *sp; 1201 unsigned ctrlr_num; 1202 char *bdev_name; 1203 char *cpumask; 1204 char *name; 1205 bool readonly; 1206 bool packed_ring; 1207 1208 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 1209 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 1210 continue; 1211 } 1212 1213 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 1214 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 1215 spdk_conf_section_get_name(sp)); 1216 return -1; 1217 } 1218 1219 name = spdk_conf_section_get_val(sp, "Name"); 1220 if (name == NULL) { 1221 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 1222 return -1; 1223 } 1224 1225 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 1226 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 1227 packed_ring = spdk_conf_section_get_boolval(sp, "PackedRing", false); 1228 1229 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 1230 if (bdev_name == NULL) { 1231 continue; 1232 } 1233 1234 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, 1235 readonly, packed_ring) < 0) { 1236 return -1; 1237 } 1238 } 1239 1240 return 0; 1241 } 1242 1243 int 1244 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, 1245 bool readonly, bool packed_ring) 1246 { 1247 struct spdk_vhost_blk_dev *bvdev = NULL; 1248 struct spdk_vhost_dev *vdev; 1249 struct spdk_bdev *bdev; 1250 int ret = 0; 1251 1252 spdk_vhost_lock(); 1253 bdev = spdk_bdev_get_by_name(dev_name); 1254 if (bdev == NULL) { 1255 SPDK_ERRLOG("%s: bdev '%s' not found\n", 1256 name, dev_name); 1257 ret = -ENODEV; 1258 goto out; 1259 } 1260 1261 bvdev = calloc(1, sizeof(*bvdev)); 1262 if (bvdev == NULL) { 1263 ret = -ENOMEM; 1264 goto out; 1265 } 1266 1267 vdev = &bvdev->vdev; 1268 vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE; 1269 vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES; 1270 vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES; 1271 1272 vdev->virtio_features |= (uint64_t)packed_ring << VIRTIO_F_RING_PACKED; 1273 1274 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1275 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD); 1276 } 1277 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1278 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 1279 } 1280 if (readonly) { 1281 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO); 1282 } 1283 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 1284 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH); 1285 } 1286 1287 ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc); 1288 if (ret != 0) { 1289 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n", 1290 name, dev_name, ret); 1291 goto out; 1292 } 1293 1294 /* 1295 * When starting qemu with vhost-user-blk multiqueue, the vhost device will 1296 * be started/stopped many times, related to the queues num, as the 1297 * vhost-user backend doesn't know the exact number of queues used for this 1298 * device. The target have to stop and start the device once got a valid 1299 * IO queue. 1300 * When stoping and starting the vhost device, the backend bdev io device 1301 * will be deleted and created repeatedly. 1302 * Hold a bdev reference so that in the struct spdk_vhost_blk_dev, so that 1303 * the io device will not be deleted. 1304 */ 1305 bvdev->dummy_io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 1306 1307 bvdev->bdev = bdev; 1308 bvdev->readonly = readonly; 1309 ret = vhost_dev_register(vdev, name, cpumask, &vhost_blk_device_backend); 1310 if (ret != 0) { 1311 spdk_put_io_channel(bvdev->dummy_io_channel); 1312 spdk_bdev_close(bvdev->bdev_desc); 1313 goto out; 1314 } 1315 1316 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: using bdev '%s'\n", name, dev_name); 1317 out: 1318 if (ret != 0 && bvdev) { 1319 free(bvdev); 1320 } 1321 spdk_vhost_unlock(); 1322 return ret; 1323 } 1324 1325 static int 1326 vhost_blk_destroy(struct spdk_vhost_dev *vdev) 1327 { 1328 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 1329 int rc; 1330 1331 assert(bvdev != NULL); 1332 1333 rc = vhost_dev_unregister(&bvdev->vdev); 1334 if (rc != 0) { 1335 return rc; 1336 } 1337 1338 /* if the bdev is removed, don't need call spdk_put_io_channel. */ 1339 if (bvdev->bdev) { 1340 spdk_put_io_channel(bvdev->dummy_io_channel); 1341 } 1342 1343 if (bvdev->bdev_desc) { 1344 spdk_bdev_close(bvdev->bdev_desc); 1345 bvdev->bdev_desc = NULL; 1346 } 1347 bvdev->bdev = NULL; 1348 1349 free(bvdev); 1350 return 0; 1351 } 1352 1353 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 1354 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 1355