1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/conf.h" 40 #include "spdk/thread.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/util.h" 44 #include "spdk/vhost.h" 45 46 #include "vhost_internal.h" 47 48 /* Minimal set of features supported by every SPDK VHOST-BLK device */ 49 #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \ 50 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 51 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 52 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \ 53 (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 54 (1ULL << VIRTIO_BLK_F_MQ)) 55 56 /* Not supported features */ 57 #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \ 58 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 59 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI)) 60 61 /* Vhost-blk support protocol features */ 62 #ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB 63 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \ 64 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) 65 #else 66 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) 67 #endif 68 69 struct spdk_vhost_blk_task { 70 struct spdk_bdev_io *bdev_io; 71 struct spdk_vhost_blk_session *bvsession; 72 struct spdk_vhost_virtqueue *vq; 73 74 volatile uint8_t *status; 75 76 uint16_t req_idx; 77 uint16_t num_descs; 78 uint16_t buffer_id; 79 80 /* for io wait */ 81 struct spdk_bdev_io_wait_entry bdev_io_wait; 82 83 /* If set, the task is currently used for I/O processing. */ 84 bool used; 85 86 /** Number of bytes that were written. */ 87 uint32_t used_len; 88 uint16_t iovcnt; 89 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 90 }; 91 92 struct spdk_vhost_blk_dev { 93 struct spdk_vhost_dev vdev; 94 struct spdk_bdev *bdev; 95 struct spdk_bdev_desc *bdev_desc; 96 bool readonly; 97 }; 98 99 struct spdk_vhost_blk_session { 100 /* The parent session must be the very first field in this struct */ 101 struct spdk_vhost_session vsession; 102 struct spdk_vhost_blk_dev *bvdev; 103 struct spdk_poller *requestq_poller; 104 struct spdk_io_channel *io_channel; 105 struct spdk_poller *stop_poller; 106 }; 107 108 /* forward declaration */ 109 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 110 111 static int 112 process_blk_request(struct spdk_vhost_blk_task *task, 113 struct spdk_vhost_blk_session *bvsession, 114 struct spdk_vhost_virtqueue *vq); 115 116 static void 117 blk_task_finish(struct spdk_vhost_blk_task *task) 118 { 119 assert(task->bvsession->vsession.task_cnt > 0); 120 task->bvsession->vsession.task_cnt--; 121 task->used = false; 122 } 123 124 static void 125 blk_task_init(struct spdk_vhost_blk_task *task) 126 { 127 task->used = true; 128 task->iovcnt = SPDK_COUNTOF(task->iovs); 129 task->status = NULL; 130 task->used_len = 0; 131 } 132 133 static void 134 blk_task_enqueue(struct spdk_vhost_blk_task *task) 135 { 136 if (task->vq->packed.packed_ring) { 137 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq, 138 task->num_descs, 139 task->buffer_id, task->used_len); 140 } else { 141 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, 142 task->req_idx, task->used_len); 143 } 144 } 145 146 static void 147 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 148 { 149 if (task->status) { 150 *task->status = status; 151 } 152 153 blk_task_enqueue(task); 154 blk_task_finish(task); 155 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 156 } 157 158 /* 159 * Process task's descriptor chain and setup data related fields. 160 * Return 161 * total size of suplied buffers 162 * 163 * FIXME: Make this function return to rd_cnt and wr_cnt 164 */ 165 static int 166 blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession, 167 struct spdk_vhost_virtqueue *vq, 168 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 169 { 170 struct spdk_vhost_session *vsession = &bvsession->vsession; 171 struct spdk_vhost_dev *vdev = vsession->vdev; 172 struct vring_desc *desc, *desc_table; 173 uint16_t out_cnt = 0, cnt = 0; 174 uint32_t desc_table_size, len = 0; 175 uint32_t desc_handled_cnt; 176 int rc; 177 178 rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size); 179 if (rc != 0) { 180 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 181 return -1; 182 } 183 184 desc_handled_cnt = 0; 185 while (1) { 186 /* 187 * Maximum cnt reached? 188 * Should not happen if request is well formatted, otherwise this is a BUG. 189 */ 190 if (spdk_unlikely(cnt == *iovs_cnt)) { 191 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 192 vsession->name, req_idx); 193 return -1; 194 } 195 196 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) { 197 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 198 vsession->name, req_idx, cnt); 199 return -1; 200 } 201 202 len += desc->len; 203 204 out_cnt += vhost_vring_desc_is_wr(desc); 205 206 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 207 if (rc != 0) { 208 SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 209 vsession->name, req_idx); 210 return -1; 211 } else if (desc == NULL) { 212 break; 213 } 214 215 desc_handled_cnt++; 216 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { 217 /* Break a cycle and report an error, if any. */ 218 SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n", 219 vsession->name, desc_table_size, desc_handled_cnt); 220 return -1; 221 } 222 } 223 224 /* 225 * There must be least two descriptors. 226 * First contain request so it must be readable. 227 * Last descriptor contain buffer for response so it must be writable. 228 */ 229 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 230 return -1; 231 } 232 233 *length = len; 234 *iovs_cnt = cnt; 235 return 0; 236 } 237 238 static int 239 blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession, 240 struct spdk_vhost_virtqueue *vq, 241 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 242 { 243 struct spdk_vhost_session *vsession = &bvsession->vsession; 244 struct spdk_vhost_dev *vdev = vsession->vdev; 245 struct vring_packed_desc *desc = NULL, *desc_table; 246 uint16_t out_cnt = 0, cnt = 0; 247 uint32_t desc_table_size, len = 0; 248 int rc = 0; 249 250 rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc, 251 &desc_table, &desc_table_size); 252 if (spdk_unlikely(rc != 0)) { 253 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 254 return rc; 255 } 256 257 if (desc_table != NULL) { 258 req_idx = 0; 259 } 260 261 while (1) { 262 /* 263 * Maximum cnt reached? 264 * Should not happen if request is well formatted, otherwise this is a BUG. 265 */ 266 if (spdk_unlikely(cnt == *iovs_cnt)) { 267 SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 268 vsession->name, req_idx); 269 return -EINVAL; 270 } 271 272 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) { 273 SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 274 vsession->name, req_idx, cnt); 275 return -EINVAL; 276 } 277 278 len += desc->len; 279 out_cnt += vhost_vring_packed_desc_is_wr(desc); 280 281 /* desc is NULL means we reach the last desc of this request */ 282 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size); 283 if (desc == NULL) { 284 break; 285 } 286 } 287 288 /* 289 * There must be least two descriptors. 290 * First contain request so it must be readable. 291 * Last descriptor contain buffer for response so it must be writable. 292 */ 293 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 294 return -EINVAL; 295 } 296 297 *length = len; 298 *iovs_cnt = cnt; 299 300 return 0; 301 } 302 303 static void 304 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 305 { 306 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 307 308 blk_task_enqueue(task); 309 310 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 311 task->req_idx, success ? "OK" : "FAIL"); 312 blk_task_finish(task); 313 } 314 315 static void 316 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 317 { 318 struct spdk_vhost_blk_task *task = cb_arg; 319 320 spdk_bdev_free_io(bdev_io); 321 blk_request_finish(success, task); 322 } 323 324 static void 325 blk_request_resubmit(void *arg) 326 { 327 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 328 int rc = 0; 329 330 blk_task_init(task); 331 332 rc = process_blk_request(task, task->bvsession, task->vq); 333 if (rc == 0) { 334 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 335 } else { 336 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 337 } 338 } 339 340 static inline void 341 blk_request_queue_io(struct spdk_vhost_blk_task *task) 342 { 343 int rc; 344 struct spdk_vhost_blk_session *bvsession = task->bvsession; 345 struct spdk_bdev *bdev = bvsession->bvdev->bdev; 346 347 task->bdev_io_wait.bdev = bdev; 348 task->bdev_io_wait.cb_fn = blk_request_resubmit; 349 task->bdev_io_wait.cb_arg = task; 350 351 rc = spdk_bdev_queue_io_wait(bdev, bvsession->io_channel, &task->bdev_io_wait); 352 if (rc != 0) { 353 SPDK_ERRLOG("%s: failed to queue I/O, rc=%d\n", bvsession->vsession.name, rc); 354 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 355 } 356 } 357 358 static int 359 process_blk_request(struct spdk_vhost_blk_task *task, 360 struct spdk_vhost_blk_session *bvsession, 361 struct spdk_vhost_virtqueue *vq) 362 { 363 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 364 const struct virtio_blk_outhdr *req; 365 struct virtio_blk_discard_write_zeroes *desc; 366 struct iovec *iov; 367 uint32_t type; 368 uint32_t payload_len; 369 uint64_t flush_bytes; 370 int rc; 371 372 if (vq->packed.packed_ring) { 373 rc = blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 374 &payload_len); 375 } else { 376 rc = blk_iovs_split_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 377 &payload_len); 378 } 379 380 if (rc) { 381 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 382 /* Only READ and WRITE are supported for now. */ 383 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 384 return -1; 385 } 386 387 iov = &task->iovs[0]; 388 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 389 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 390 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 391 iov->iov_len, sizeof(*req), task->req_idx); 392 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 393 return -1; 394 } 395 396 req = iov->iov_base; 397 398 iov = &task->iovs[task->iovcnt - 1]; 399 if (spdk_unlikely(iov->iov_len != 1)) { 400 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 401 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 402 iov->iov_len, 1, task->req_idx); 403 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 404 return -1; 405 } 406 407 task->status = iov->iov_base; 408 payload_len -= sizeof(*req) + sizeof(*task->status); 409 task->iovcnt -= 2; 410 411 type = req->type; 412 #ifdef VIRTIO_BLK_T_BARRIER 413 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 414 type &= ~VIRTIO_BLK_T_BARRIER; 415 #endif 416 417 switch (type) { 418 case VIRTIO_BLK_T_IN: 419 case VIRTIO_BLK_T_OUT: 420 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { 421 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 422 type ? "WRITE" : "READ", task->req_idx); 423 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 424 return -1; 425 } 426 427 if (type == VIRTIO_BLK_T_IN) { 428 task->used_len = payload_len + sizeof(*task->status); 429 rc = spdk_bdev_readv(bvdev->bdev_desc, bvsession->io_channel, 430 &task->iovs[1], task->iovcnt, req->sector * 512, 431 payload_len, blk_request_complete_cb, task); 432 } else if (!bvdev->readonly) { 433 task->used_len = sizeof(*task->status); 434 rc = spdk_bdev_writev(bvdev->bdev_desc, bvsession->io_channel, 435 &task->iovs[1], task->iovcnt, req->sector * 512, 436 payload_len, blk_request_complete_cb, task); 437 } else { 438 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 439 rc = -1; 440 } 441 442 if (rc) { 443 if (rc == -ENOMEM) { 444 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 445 blk_request_queue_io(task); 446 } else { 447 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 448 return -1; 449 } 450 } 451 break; 452 case VIRTIO_BLK_T_DISCARD: 453 desc = task->iovs[1].iov_base; 454 if (payload_len != sizeof(*desc)) { 455 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 456 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 457 return -1; 458 } 459 460 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvsession->io_channel, 461 desc->sector * 512, desc->num_sectors * 512, 462 blk_request_complete_cb, task); 463 if (rc) { 464 if (rc == -ENOMEM) { 465 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 466 blk_request_queue_io(task); 467 } else { 468 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 469 return -1; 470 } 471 } 472 break; 473 case VIRTIO_BLK_T_WRITE_ZEROES: 474 desc = task->iovs[1].iov_base; 475 if (payload_len != sizeof(*desc)) { 476 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 477 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 478 return -1; 479 } 480 481 /* Zeroed and Unmap the range, SPDK doen't support it. */ 482 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 483 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 484 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 485 return -1; 486 } 487 488 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvsession->io_channel, 489 desc->sector * 512, desc->num_sectors * 512, 490 blk_request_complete_cb, task); 491 if (rc) { 492 if (rc == -ENOMEM) { 493 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 494 blk_request_queue_io(task); 495 } else { 496 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 497 return -1; 498 } 499 } 500 break; 501 case VIRTIO_BLK_T_FLUSH: 502 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev); 503 if (req->sector != 0) { 504 SPDK_NOTICELOG("sector must be zero for flush command\n"); 505 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 506 return -1; 507 } 508 rc = spdk_bdev_flush(bvdev->bdev_desc, bvsession->io_channel, 509 0, flush_bytes, 510 blk_request_complete_cb, task); 511 if (rc) { 512 if (rc == -ENOMEM) { 513 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 514 blk_request_queue_io(task); 515 } else { 516 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 517 return -1; 518 } 519 } 520 break; 521 case VIRTIO_BLK_T_GET_ID: 522 if (!task->iovcnt || !payload_len) { 523 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 524 return -1; 525 } 526 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 527 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 528 task->used_len, ' '); 529 blk_request_finish(true, task); 530 break; 531 default: 532 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 533 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 534 return -1; 535 } 536 537 return 0; 538 } 539 540 static void 541 process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx) 542 { 543 struct spdk_vhost_blk_task *task; 544 uint16_t task_idx = req_idx, num_descs; 545 546 if (vq->packed.packed_ring) { 547 /* Packed ring used the buffer_id as the task_idx to get task struct. 548 * In kernel driver, it uses the vq->free_head to set the buffer_id so the value 549 * must be in the range of 0 ~ vring.size. The free_head value must be unique 550 * in the outstanding requests. 551 * We can't use the req_idx as the task_idx because the desc can be reused in 552 * the next phase even when it's not completed in the previous phase. For example, 553 * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving 554 * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used 555 * as task_idx because we will know task[0]->used is true at phase 1. 556 * The split queue is quite different, the desc would insert into the free list when 557 * device completes the request, the driver gets the desc from the free list which 558 * ensures the req_idx is unique in the outstanding requests. 559 */ 560 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 561 } 562 563 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 564 if (spdk_unlikely(task->used)) { 565 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 566 task->bvsession->vsession.name, task_idx); 567 task->used_len = 0; 568 blk_task_enqueue(task); 569 return; 570 } 571 572 if (vq->packed.packed_ring) { 573 task->req_idx = req_idx; 574 task->num_descs = num_descs; 575 task->buffer_id = task_idx; 576 } 577 578 task->bvsession->vsession.task_cnt++; 579 580 blk_task_init(task); 581 582 if (process_blk_request(task, task->bvsession, vq) == 0) { 583 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 584 task_idx); 585 } else { 586 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx); 587 } 588 } 589 590 static void 591 submit_inflight_desc(struct spdk_vhost_blk_session *bvsession, 592 struct spdk_vhost_virtqueue *vq) 593 { 594 struct spdk_vhost_session *vsession = &bvsession->vsession; 595 spdk_vhost_resubmit_info *resubmit = vq->vring_inflight.resubmit_inflight; 596 spdk_vhost_resubmit_desc *resubmit_list; 597 uint16_t req_idx; 598 599 if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL)) { 600 return; 601 } 602 603 resubmit_list = resubmit->resubmit_list; 604 while (resubmit->resubmit_num-- > 0) { 605 req_idx = resubmit_list[resubmit->resubmit_num].index; 606 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Start processing request idx %"PRIu16"======\n", 607 req_idx); 608 609 if (spdk_unlikely(req_idx >= vq->vring.size)) { 610 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 611 vsession->name, req_idx, vq->vring.size); 612 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 613 continue; 614 } 615 616 process_blk_task(vq, req_idx); 617 } 618 619 free(resubmit_list); 620 resubmit->resubmit_list = NULL; 621 } 622 623 static void 624 process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 625 { 626 struct spdk_vhost_session *vsession = &bvsession->vsession; 627 uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS]; 628 uint16_t reqs_cnt, i; 629 630 submit_inflight_desc(bvsession, vq); 631 632 reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 633 if (!reqs_cnt) { 634 return; 635 } 636 637 for (i = 0; i < reqs_cnt; i++) { 638 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 639 reqs[i]); 640 641 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 642 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 643 vsession->name, reqs[i], vq->vring.size); 644 vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 645 continue; 646 } 647 648 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]); 649 650 process_blk_task(vq, reqs[i]); 651 } 652 } 653 654 static void 655 process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 656 { 657 uint16_t i = 0; 658 659 while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS && 660 vhost_vq_packed_ring_is_avail(vq)) { 661 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 662 vq->last_avail_idx); 663 664 process_blk_task(vq, vq->last_avail_idx); 665 } 666 } 667 668 static int 669 vdev_worker(void *arg) 670 { 671 struct spdk_vhost_blk_session *bvsession = arg; 672 struct spdk_vhost_session *vsession = &bvsession->vsession; 673 674 uint16_t q_idx; 675 bool packed_ring; 676 677 /* In a session, every vq supports the same format */ 678 packed_ring = vsession->virtqueue[0].packed.packed_ring; 679 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 680 if (packed_ring) { 681 process_packed_vq(bvsession, &vsession->virtqueue[q_idx]); 682 } else { 683 process_vq(bvsession, &vsession->virtqueue[q_idx]); 684 } 685 } 686 687 vhost_session_used_signal(vsession); 688 689 return -1; 690 } 691 692 static void 693 no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 694 { 695 struct spdk_vhost_session *vsession = &bvsession->vsession; 696 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 697 uint32_t length; 698 uint16_t iovcnt, req_idx; 699 700 if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 701 return; 702 } 703 704 iovcnt = SPDK_COUNTOF(iovs); 705 if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) { 706 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 707 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 708 } 709 710 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 711 } 712 713 static void 714 no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 715 { 716 struct spdk_vhost_session *vsession = &bvsession->vsession; 717 struct spdk_vhost_blk_task *task; 718 uint32_t length; 719 uint16_t req_idx = vq->last_avail_idx; 720 uint16_t task_idx, num_descs; 721 722 if (!vhost_vq_packed_ring_is_avail(vq)) { 723 return; 724 } 725 726 task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs); 727 task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx]; 728 if (spdk_unlikely(task->used)) { 729 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 730 vsession->name, req_idx); 731 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 732 task->buffer_id, task->used_len); 733 return; 734 } 735 736 task->req_idx = req_idx; 737 task->num_descs = num_descs; 738 task->buffer_id = task_idx; 739 blk_task_init(task); 740 741 if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, 742 &length)) { 743 *(volatile uint8_t *)(task->iovs[task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR; 744 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 745 } 746 747 task->used = false; 748 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs, 749 task->buffer_id, task->used_len); 750 } 751 752 static int 753 no_bdev_vdev_worker(void *arg) 754 { 755 struct spdk_vhost_blk_session *bvsession = arg; 756 struct spdk_vhost_session *vsession = &bvsession->vsession; 757 uint16_t q_idx; 758 bool packed_ring; 759 760 /* In a session, every vq supports the same format */ 761 packed_ring = vsession->virtqueue[0].packed.packed_ring; 762 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 763 if (packed_ring) { 764 no_bdev_process_packed_vq(bvsession, &vsession->virtqueue[q_idx]); 765 } else { 766 no_bdev_process_vq(bvsession, &vsession->virtqueue[q_idx]); 767 } 768 } 769 770 vhost_session_used_signal(vsession); 771 772 if (vsession->task_cnt == 0 && bvsession->io_channel) { 773 spdk_put_io_channel(bvsession->io_channel); 774 bvsession->io_channel = NULL; 775 } 776 777 return -1; 778 } 779 780 static struct spdk_vhost_blk_session * 781 to_blk_session(struct spdk_vhost_session *vsession) 782 { 783 assert(vsession->vdev->backend == &vhost_blk_device_backend); 784 return (struct spdk_vhost_blk_session *)vsession; 785 } 786 787 static struct spdk_vhost_blk_dev * 788 to_blk_dev(struct spdk_vhost_dev *vdev) 789 { 790 if (vdev == NULL) { 791 return NULL; 792 } 793 794 if (vdev->backend != &vhost_blk_device_backend) { 795 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 796 return NULL; 797 } 798 799 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 800 } 801 802 struct spdk_bdev * 803 spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev) 804 { 805 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 806 807 assert(bvdev != NULL); 808 return bvdev->bdev; 809 } 810 811 static void 812 vhost_dev_bdev_remove_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx) 813 { 814 815 /* All sessions have been notified, time to close the bdev */ 816 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 817 818 assert(bvdev != NULL); 819 spdk_bdev_close(bvdev->bdev_desc); 820 bvdev->bdev_desc = NULL; 821 bvdev->bdev = NULL; 822 } 823 824 static int 825 vhost_session_bdev_remove_cb(struct spdk_vhost_dev *vdev, 826 struct spdk_vhost_session *vsession, 827 void *ctx) 828 { 829 struct spdk_vhost_blk_session *bvsession; 830 831 bvsession = (struct spdk_vhost_blk_session *)vsession; 832 if (bvsession->requestq_poller) { 833 spdk_poller_unregister(&bvsession->requestq_poller); 834 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0); 835 } 836 837 return 0; 838 } 839 840 static void 841 bdev_remove_cb(void *remove_ctx) 842 { 843 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 844 845 SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n", 846 bvdev->vdev.name); 847 848 spdk_vhost_lock(); 849 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_remove_cb, 850 vhost_dev_bdev_remove_cpl_cb, NULL); 851 spdk_vhost_unlock(); 852 } 853 854 static void 855 free_task_pool(struct spdk_vhost_blk_session *bvsession) 856 { 857 struct spdk_vhost_session *vsession = &bvsession->vsession; 858 struct spdk_vhost_virtqueue *vq; 859 uint16_t i; 860 861 for (i = 0; i < vsession->max_queues; i++) { 862 vq = &vsession->virtqueue[i]; 863 if (vq->tasks == NULL) { 864 continue; 865 } 866 867 spdk_free(vq->tasks); 868 vq->tasks = NULL; 869 } 870 } 871 872 static int 873 alloc_task_pool(struct spdk_vhost_blk_session *bvsession) 874 { 875 struct spdk_vhost_session *vsession = &bvsession->vsession; 876 struct spdk_vhost_virtqueue *vq; 877 struct spdk_vhost_blk_task *task; 878 uint32_t task_cnt; 879 uint16_t i; 880 uint32_t j; 881 882 for (i = 0; i < vsession->max_queues; i++) { 883 vq = &vsession->virtqueue[i]; 884 if (vq->vring.desc == NULL) { 885 continue; 886 } 887 888 task_cnt = vq->vring.size; 889 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 890 /* sanity check */ 891 SPDK_ERRLOG("%s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 892 vsession->name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 893 free_task_pool(bvsession); 894 return -1; 895 } 896 vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 897 SPDK_CACHE_LINE_SIZE, NULL, 898 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 899 if (vq->tasks == NULL) { 900 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 901 vsession->name, task_cnt, i); 902 free_task_pool(bvsession); 903 return -1; 904 } 905 906 for (j = 0; j < task_cnt; j++) { 907 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 908 task->bvsession = bvsession; 909 task->req_idx = j; 910 task->vq = vq; 911 } 912 } 913 914 return 0; 915 } 916 917 static int 918 vhost_blk_start_cb(struct spdk_vhost_dev *vdev, 919 struct spdk_vhost_session *vsession, void *unused) 920 { 921 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 922 struct spdk_vhost_blk_dev *bvdev; 923 int i, rc = 0; 924 925 bvdev = to_blk_dev(vdev); 926 assert(bvdev != NULL); 927 bvsession->bvdev = bvdev; 928 929 /* validate all I/O queues are in a contiguous index range */ 930 for (i = 0; i < vsession->max_queues; i++) { 931 /* vring.desc and vring.desc_packed are in a union struct 932 * so q->vring.desc can replace q->vring.desc_packed. 933 */ 934 if (vsession->virtqueue[i].vring.desc == NULL) { 935 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i); 936 rc = -1; 937 goto out; 938 } 939 } 940 941 rc = alloc_task_pool(bvsession); 942 if (rc != 0) { 943 SPDK_ERRLOG("%s: failed to alloc task pool.\n", vsession->name); 944 goto out; 945 } 946 947 if (bvdev->bdev) { 948 bvsession->io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 949 if (!bvsession->io_channel) { 950 free_task_pool(bvsession); 951 SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name); 952 rc = -1; 953 goto out; 954 } 955 } 956 957 bvsession->requestq_poller = SPDK_POLLER_REGISTER(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 958 bvsession, 0); 959 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: started poller on lcore %d\n", 960 vsession->name, spdk_env_get_current_core()); 961 out: 962 vhost_session_start_done(vsession, rc); 963 return rc; 964 } 965 966 static int 967 vhost_blk_start(struct spdk_vhost_session *vsession) 968 { 969 return vhost_session_send_event(vsession, vhost_blk_start_cb, 970 3, "start session"); 971 } 972 973 static int 974 destroy_session_poller_cb(void *arg) 975 { 976 struct spdk_vhost_blk_session *bvsession = arg; 977 struct spdk_vhost_session *vsession = &bvsession->vsession; 978 int i; 979 980 if (vsession->task_cnt > 0) { 981 return -1; 982 } 983 984 if (spdk_vhost_trylock() != 0) { 985 return -1; 986 } 987 988 for (i = 0; i < vsession->max_queues; i++) { 989 vsession->virtqueue[i].next_event_time = 0; 990 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 991 } 992 993 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: stopping poller on lcore %d\n", 994 vsession->name, spdk_env_get_current_core()); 995 996 if (bvsession->io_channel) { 997 spdk_put_io_channel(bvsession->io_channel); 998 bvsession->io_channel = NULL; 999 } 1000 1001 free_task_pool(bvsession); 1002 spdk_poller_unregister(&bvsession->stop_poller); 1003 vhost_session_stop_done(vsession, 0); 1004 1005 spdk_vhost_unlock(); 1006 return -1; 1007 } 1008 1009 static int 1010 vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, 1011 struct spdk_vhost_session *vsession, void *unused) 1012 { 1013 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 1014 1015 spdk_poller_unregister(&bvsession->requestq_poller); 1016 bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb, 1017 bvsession, 1000); 1018 return 0; 1019 } 1020 1021 static int 1022 vhost_blk_stop(struct spdk_vhost_session *vsession) 1023 { 1024 return vhost_session_send_event(vsession, vhost_blk_stop_cb, 1025 3, "stop session"); 1026 } 1027 1028 static void 1029 vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1030 { 1031 struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev); 1032 struct spdk_vhost_blk_dev *bvdev; 1033 1034 bvdev = to_blk_dev(vdev); 1035 assert(bvdev != NULL); 1036 spdk_json_write_named_object_begin(w, "block"); 1037 1038 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1039 1040 spdk_json_write_name(w, "bdev"); 1041 if (bdev) { 1042 spdk_json_write_string(w, spdk_bdev_get_name(bdev)); 1043 } else { 1044 spdk_json_write_null(w); 1045 } 1046 1047 spdk_json_write_object_end(w); 1048 } 1049 1050 static void 1051 vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 1052 { 1053 struct spdk_vhost_blk_dev *bvdev; 1054 1055 bvdev = to_blk_dev(vdev); 1056 assert(bvdev != NULL); 1057 if (!bvdev->bdev) { 1058 return; 1059 } 1060 1061 spdk_json_write_object_begin(w); 1062 spdk_json_write_named_string(w, "method", "vhost_create_blk_controller"); 1063 1064 spdk_json_write_named_object_begin(w, "params"); 1065 spdk_json_write_named_string(w, "ctrlr", vdev->name); 1066 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 1067 spdk_json_write_named_string(w, "cpumask", 1068 spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread))); 1069 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 1070 spdk_json_write_object_end(w); 1071 1072 spdk_json_write_object_end(w); 1073 } 1074 1075 static int vhost_blk_destroy(struct spdk_vhost_dev *dev); 1076 1077 static int 1078 vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 1079 uint32_t len) 1080 { 1081 struct virtio_blk_config blkcfg; 1082 struct spdk_vhost_blk_dev *bvdev; 1083 struct spdk_bdev *bdev; 1084 uint32_t blk_size; 1085 uint64_t blkcnt; 1086 1087 memset(&blkcfg, 0, sizeof(blkcfg)); 1088 bvdev = to_blk_dev(vdev); 1089 assert(bvdev != NULL); 1090 bdev = bvdev->bdev; 1091 if (bdev == NULL) { 1092 /* We can't just return -1 here as this GET_CONFIG message might 1093 * be caused by a QEMU VM reboot. Returning -1 will indicate an 1094 * error to QEMU, who might then decide to terminate itself. 1095 * We don't want that. A simple reboot shouldn't break the system. 1096 * 1097 * Presenting a block device with block size 0 and block count 0 1098 * doesn't cause any problems on QEMU side and the virtio-pci 1099 * device is even still available inside the VM, but there will 1100 * be no block device created for it - the kernel drivers will 1101 * silently reject it. 1102 */ 1103 blk_size = 0; 1104 blkcnt = 0; 1105 } else { 1106 blk_size = spdk_bdev_get_block_size(bdev); 1107 blkcnt = spdk_bdev_get_num_blocks(bdev); 1108 if (spdk_bdev_get_buf_align(bdev) > 1) { 1109 blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE; 1110 blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, BDEV_IO_NUM_CHILD_IOV - 2 - 1); 1111 } else { 1112 blkcfg.size_max = 131072; 1113 /* -2 for REQ and RESP and -1 for region boundary splitting */ 1114 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 1115 } 1116 } 1117 1118 blkcfg.blk_size = blk_size; 1119 /* minimum I/O size in blocks */ 1120 blkcfg.min_io_size = 1; 1121 /* expressed in 512 Bytes sectors */ 1122 blkcfg.capacity = (blkcnt * blk_size) / 512; 1123 /* QEMU can overwrite this value when started */ 1124 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 1125 1126 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1127 /* 16MiB, expressed in 512 Bytes */ 1128 blkcfg.max_discard_sectors = 32768; 1129 blkcfg.max_discard_seg = 1; 1130 blkcfg.discard_sector_alignment = blk_size / 512; 1131 } 1132 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1133 blkcfg.max_write_zeroes_sectors = 32768; 1134 blkcfg.max_write_zeroes_seg = 1; 1135 } 1136 1137 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 1138 1139 return 0; 1140 } 1141 1142 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 1143 .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session), 1144 .start_session = vhost_blk_start, 1145 .stop_session = vhost_blk_stop, 1146 .vhost_get_config = vhost_blk_get_config, 1147 .dump_info_json = vhost_blk_dump_info_json, 1148 .write_config_json = vhost_blk_write_config_json, 1149 .remove_device = vhost_blk_destroy, 1150 }; 1151 1152 int 1153 vhost_blk_controller_construct(void) 1154 { 1155 struct spdk_conf_section *sp; 1156 unsigned ctrlr_num; 1157 char *bdev_name; 1158 char *cpumask; 1159 char *name; 1160 bool readonly; 1161 bool packed_ring; 1162 1163 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 1164 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 1165 continue; 1166 } 1167 1168 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 1169 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 1170 spdk_conf_section_get_name(sp)); 1171 return -1; 1172 } 1173 1174 name = spdk_conf_section_get_val(sp, "Name"); 1175 if (name == NULL) { 1176 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 1177 return -1; 1178 } 1179 1180 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 1181 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 1182 packed_ring = spdk_conf_section_get_boolval(sp, "PackedRing", false); 1183 1184 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 1185 if (bdev_name == NULL) { 1186 continue; 1187 } 1188 1189 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, 1190 readonly, packed_ring) < 0) { 1191 return -1; 1192 } 1193 } 1194 1195 return 0; 1196 } 1197 1198 int 1199 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, 1200 bool readonly, bool packed_ring) 1201 { 1202 struct spdk_vhost_blk_dev *bvdev = NULL; 1203 struct spdk_vhost_dev *vdev; 1204 struct spdk_bdev *bdev; 1205 int ret = 0; 1206 1207 spdk_vhost_lock(); 1208 bdev = spdk_bdev_get_by_name(dev_name); 1209 if (bdev == NULL) { 1210 SPDK_ERRLOG("%s: bdev '%s' not found\n", 1211 name, dev_name); 1212 ret = -ENODEV; 1213 goto out; 1214 } 1215 1216 bvdev = calloc(1, sizeof(*bvdev)); 1217 if (bvdev == NULL) { 1218 ret = -ENOMEM; 1219 goto out; 1220 } 1221 1222 vdev = &bvdev->vdev; 1223 vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE; 1224 vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES; 1225 vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES; 1226 1227 vdev->virtio_features |= (uint64_t)packed_ring << VIRTIO_F_RING_PACKED; 1228 1229 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1230 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD); 1231 } 1232 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1233 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 1234 } 1235 if (readonly) { 1236 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO); 1237 } 1238 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 1239 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH); 1240 } 1241 1242 ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc); 1243 if (ret != 0) { 1244 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n", 1245 name, dev_name, ret); 1246 goto out; 1247 } 1248 1249 bvdev->bdev = bdev; 1250 bvdev->readonly = readonly; 1251 ret = vhost_dev_register(vdev, name, cpumask, &vhost_blk_device_backend); 1252 if (ret != 0) { 1253 spdk_bdev_close(bvdev->bdev_desc); 1254 goto out; 1255 } 1256 1257 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: using bdev '%s'\n", name, dev_name); 1258 out: 1259 if (ret != 0 && bvdev) { 1260 free(bvdev); 1261 } 1262 spdk_vhost_unlock(); 1263 return ret; 1264 } 1265 1266 static int 1267 vhost_blk_destroy(struct spdk_vhost_dev *vdev) 1268 { 1269 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 1270 int rc; 1271 1272 assert(bvdev != NULL); 1273 rc = vhost_dev_unregister(&bvdev->vdev); 1274 if (rc != 0) { 1275 return rc; 1276 } 1277 1278 if (bvdev->bdev_desc) { 1279 spdk_bdev_close(bvdev->bdev_desc); 1280 bvdev->bdev_desc = NULL; 1281 } 1282 bvdev->bdev = NULL; 1283 1284 free(bvdev); 1285 return 0; 1286 } 1287 1288 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 1289 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 1290