1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_blk.h> 35 36 #include "spdk/env.h" 37 #include "spdk/bdev.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/conf.h" 40 #include "spdk/thread.h" 41 #include "spdk/likely.h" 42 #include "spdk/string.h" 43 #include "spdk/util.h" 44 #include "spdk/vhost.h" 45 46 #include "vhost_internal.h" 47 48 /* Minimal set of features supported by every SPDK VHOST-BLK device */ 49 #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \ 50 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 51 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 52 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \ 53 (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 54 (1ULL << VIRTIO_BLK_F_MQ)) 55 56 /* Not supported features */ 57 #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \ 58 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \ 59 (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI)) 60 61 /* Vhost-blk support protocol features */ 62 #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \ 63 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) 64 65 struct spdk_vhost_blk_task { 66 struct spdk_bdev_io *bdev_io; 67 struct spdk_vhost_blk_session *bvsession; 68 struct spdk_vhost_virtqueue *vq; 69 70 volatile uint8_t *status; 71 72 uint16_t req_idx; 73 74 /* for io wait */ 75 struct spdk_bdev_io_wait_entry bdev_io_wait; 76 77 /* If set, the task is currently used for I/O processing. */ 78 bool used; 79 80 /** Number of bytes that were written. */ 81 uint32_t used_len; 82 uint16_t iovcnt; 83 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 84 }; 85 86 struct spdk_vhost_blk_dev { 87 struct spdk_vhost_dev vdev; 88 struct spdk_bdev *bdev; 89 struct spdk_bdev_desc *bdev_desc; 90 bool readonly; 91 }; 92 93 struct spdk_vhost_blk_session { 94 /* The parent session must be the very first field in this struct */ 95 struct spdk_vhost_session vsession; 96 struct spdk_vhost_blk_dev *bvdev; 97 struct spdk_poller *requestq_poller; 98 struct spdk_io_channel *io_channel; 99 struct spdk_poller *stop_poller; 100 }; 101 102 /* forward declaration */ 103 static const struct spdk_vhost_dev_backend vhost_blk_device_backend; 104 105 static int 106 process_blk_request(struct spdk_vhost_blk_task *task, 107 struct spdk_vhost_blk_session *bvsession, 108 struct spdk_vhost_virtqueue *vq); 109 110 static void 111 blk_task_finish(struct spdk_vhost_blk_task *task) 112 { 113 assert(task->bvsession->vsession.task_cnt > 0); 114 task->bvsession->vsession.task_cnt--; 115 task->used = false; 116 } 117 118 static void 119 invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status) 120 { 121 if (task->status) { 122 *task->status = status; 123 } 124 125 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, task->req_idx, 126 task->used_len); 127 blk_task_finish(task); 128 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status); 129 } 130 131 /* 132 * Process task's descriptor chain and setup data related fields. 133 * Return 134 * total size of suplied buffers 135 * 136 * FIXME: Make this function return to rd_cnt and wr_cnt 137 */ 138 static int 139 blk_iovs_setup(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq, 140 uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length) 141 { 142 struct spdk_vhost_session *vsession = &bvsession->vsession; 143 struct spdk_vhost_dev *vdev = vsession->vdev; 144 struct vring_desc *desc, *desc_table; 145 uint16_t out_cnt = 0, cnt = 0; 146 uint32_t desc_table_size, len = 0; 147 uint32_t desc_handled_cnt; 148 int rc; 149 150 rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size); 151 if (rc != 0) { 152 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx); 153 return -1; 154 } 155 156 desc_handled_cnt = 0; 157 while (1) { 158 /* 159 * Maximum cnt reached? 160 * Should not happen if request is well formatted, otherwise this is a BUG. 161 */ 162 if (spdk_unlikely(cnt == *iovs_cnt)) { 163 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n", 164 vsession->name, req_idx); 165 return -1; 166 } 167 168 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) { 169 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n", 170 vsession->name, req_idx, cnt); 171 return -1; 172 } 173 174 len += desc->len; 175 176 out_cnt += vhost_vring_desc_is_wr(desc); 177 178 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size); 179 if (rc != 0) { 180 SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n", 181 vsession->name, req_idx); 182 return -1; 183 } else if (desc == NULL) { 184 break; 185 } 186 187 desc_handled_cnt++; 188 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) { 189 /* Break a cycle and report an error, if any. */ 190 SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n", 191 vsession->name, desc_table_size, desc_handled_cnt); 192 return -1; 193 } 194 } 195 196 /* 197 * There must be least two descriptors. 198 * First contain request so it must be readable. 199 * Last descriptor contain buffer for response so it must be writable. 200 */ 201 if (spdk_unlikely(out_cnt == 0 || cnt < 2)) { 202 return -1; 203 } 204 205 *length = len; 206 *iovs_cnt = cnt; 207 return 0; 208 } 209 210 static void 211 blk_request_finish(bool success, struct spdk_vhost_blk_task *task) 212 { 213 *task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; 214 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq, task->req_idx, 215 task->used_len); 216 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task, 217 task->req_idx, success ? "OK" : "FAIL"); 218 blk_task_finish(task); 219 } 220 221 static void 222 blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 223 { 224 struct spdk_vhost_blk_task *task = cb_arg; 225 226 spdk_bdev_free_io(bdev_io); 227 blk_request_finish(success, task); 228 } 229 230 static void 231 blk_request_resubmit(void *arg) 232 { 233 struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg; 234 int rc = 0; 235 236 rc = process_blk_request(task, task->bvsession, task->vq); 237 if (rc == 0) { 238 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task); 239 } else { 240 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task); 241 } 242 } 243 244 static inline void 245 blk_request_queue_io(struct spdk_vhost_blk_task *task) 246 { 247 int rc; 248 struct spdk_vhost_blk_session *bvsession = task->bvsession; 249 struct spdk_bdev *bdev = bvsession->bvdev->bdev; 250 251 task->bdev_io_wait.bdev = bdev; 252 task->bdev_io_wait.cb_fn = blk_request_resubmit; 253 task->bdev_io_wait.cb_arg = task; 254 255 rc = spdk_bdev_queue_io_wait(bdev, bvsession->io_channel, &task->bdev_io_wait); 256 if (rc != 0) { 257 SPDK_ERRLOG("%s: failed to queue I/O, rc=%d\n", bvsession->vsession.name, rc); 258 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 259 } 260 } 261 262 static int 263 process_blk_request(struct spdk_vhost_blk_task *task, 264 struct spdk_vhost_blk_session *bvsession, 265 struct spdk_vhost_virtqueue *vq) 266 { 267 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 268 const struct virtio_blk_outhdr *req; 269 struct virtio_blk_discard_write_zeroes *desc; 270 struct iovec *iov; 271 uint32_t type; 272 uint32_t payload_len; 273 uint64_t flush_bytes; 274 int rc; 275 276 if (blk_iovs_setup(bvsession, vq, task->req_idx, task->iovs, &task->iovcnt, &payload_len)) { 277 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx); 278 /* Only READ and WRITE are supported for now. */ 279 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 280 return -1; 281 } 282 283 iov = &task->iovs[0]; 284 if (spdk_unlikely(iov->iov_len != sizeof(*req))) { 285 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 286 "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n", 287 iov->iov_len, sizeof(*req), task->req_idx); 288 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 289 return -1; 290 } 291 292 req = iov->iov_base; 293 294 iov = &task->iovs[task->iovcnt - 1]; 295 if (spdk_unlikely(iov->iov_len != 1)) { 296 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, 297 "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n", 298 iov->iov_len, 1, task->req_idx); 299 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 300 return -1; 301 } 302 303 task->status = iov->iov_base; 304 payload_len -= sizeof(*req) + sizeof(*task->status); 305 task->iovcnt -= 2; 306 307 type = req->type; 308 #ifdef VIRTIO_BLK_T_BARRIER 309 /* Don't care about barier for now (as QEMU's virtio-blk do). */ 310 type &= ~VIRTIO_BLK_T_BARRIER; 311 #endif 312 313 switch (type) { 314 case VIRTIO_BLK_T_IN: 315 case VIRTIO_BLK_T_OUT: 316 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) { 317 SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n", 318 type ? "WRITE" : "READ", task->req_idx); 319 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 320 return -1; 321 } 322 323 if (type == VIRTIO_BLK_T_IN) { 324 task->used_len = payload_len + sizeof(*task->status); 325 rc = spdk_bdev_readv(bvdev->bdev_desc, bvsession->io_channel, 326 &task->iovs[1], task->iovcnt, req->sector * 512, 327 payload_len, blk_request_complete_cb, task); 328 } else if (!bvdev->readonly) { 329 task->used_len = sizeof(*task->status); 330 rc = spdk_bdev_writev(bvdev->bdev_desc, bvsession->io_channel, 331 &task->iovs[1], task->iovcnt, req->sector * 512, 332 payload_len, blk_request_complete_cb, task); 333 } else { 334 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n"); 335 rc = -1; 336 } 337 338 if (rc) { 339 if (rc == -ENOMEM) { 340 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 341 blk_request_queue_io(task); 342 } else { 343 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 344 return -1; 345 } 346 } 347 break; 348 case VIRTIO_BLK_T_DISCARD: 349 desc = task->iovs[1].iov_base; 350 if (payload_len != sizeof(*desc)) { 351 SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len); 352 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 353 return -1; 354 } 355 356 rc = spdk_bdev_unmap(bvdev->bdev_desc, bvsession->io_channel, 357 desc->sector * 512, desc->num_sectors * 512, 358 blk_request_complete_cb, task); 359 if (rc) { 360 if (rc == -ENOMEM) { 361 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 362 blk_request_queue_io(task); 363 } else { 364 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 365 return -1; 366 } 367 } 368 break; 369 case VIRTIO_BLK_T_WRITE_ZEROES: 370 desc = task->iovs[1].iov_base; 371 if (payload_len != sizeof(*desc)) { 372 SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len); 373 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 374 return -1; 375 } 376 377 /* Zeroed and Unmap the range, SPDK doen't support it. */ 378 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 379 SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n"); 380 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 381 return -1; 382 } 383 384 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvsession->io_channel, 385 desc->sector * 512, desc->num_sectors * 512, 386 blk_request_complete_cb, task); 387 if (rc) { 388 if (rc == -ENOMEM) { 389 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 390 blk_request_queue_io(task); 391 } else { 392 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 393 return -1; 394 } 395 } 396 break; 397 case VIRTIO_BLK_T_FLUSH: 398 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev); 399 if (req->sector != 0) { 400 SPDK_NOTICELOG("sector must be zero for flush command\n"); 401 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 402 return -1; 403 } 404 rc = spdk_bdev_flush(bvdev->bdev_desc, bvsession->io_channel, 405 0, flush_bytes, 406 blk_request_complete_cb, task); 407 if (rc) { 408 if (rc == -ENOMEM) { 409 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n"); 410 blk_request_queue_io(task); 411 } else { 412 invalid_blk_request(task, VIRTIO_BLK_S_IOERR); 413 return -1; 414 } 415 } 416 break; 417 case VIRTIO_BLK_T_GET_ID: 418 if (!task->iovcnt || !payload_len) { 419 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 420 return -1; 421 } 422 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len); 423 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev), 424 task->used_len, ' '); 425 blk_request_finish(true, task); 426 break; 427 default: 428 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type); 429 invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP); 430 return -1; 431 } 432 433 return 0; 434 } 435 436 static void 437 submit_inflight_desc(struct spdk_vhost_blk_session *bvsession, 438 struct spdk_vhost_virtqueue *vq) 439 { 440 struct spdk_vhost_blk_dev *bvdev = bvsession->bvdev; 441 struct spdk_vhost_blk_task *task; 442 struct spdk_vhost_session *vsession = &bvsession->vsession; 443 spdk_vhost_resubmit_info *resubmit = vq->vring_inflight.resubmit_inflight; 444 spdk_vhost_resubmit_desc *resubmit_list; 445 int rc; 446 uint16_t req_idx; 447 448 if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL)) { 449 return; 450 } 451 452 resubmit_list = resubmit->resubmit_list; 453 while (resubmit->resubmit_num-- > 0) { 454 req_idx = resubmit_list[resubmit->resubmit_num].index; 455 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Start processing request idx %"PRIu16"======\n", 456 req_idx); 457 458 if (spdk_unlikely(req_idx >= vq->vring.size)) { 459 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 460 bvdev->vdev.name, req_idx, vq->vring.size); 461 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 462 continue; 463 } 464 465 task = &((struct spdk_vhost_blk_task *)vq->tasks)[req_idx]; 466 if (spdk_unlikely(task->used)) { 467 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 468 bvdev->vdev.name, req_idx); 469 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 470 continue; 471 } 472 473 vsession->task_cnt++; 474 475 task->used = true; 476 task->iovcnt = SPDK_COUNTOF(task->iovs); 477 task->status = NULL; 478 task->used_len = 0; 479 480 rc = process_blk_request(task, bvsession, vq); 481 if (rc == 0) { 482 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 483 req_idx); 484 } else { 485 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, 486 req_idx); 487 } 488 } 489 490 free(resubmit_list); 491 resubmit->resubmit_list = NULL; 492 } 493 494 static void 495 process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 496 { 497 struct spdk_vhost_blk_task *task; 498 struct spdk_vhost_session *vsession = &bvsession->vsession; 499 int rc; 500 uint16_t reqs[32]; 501 uint16_t reqs_cnt, i; 502 uint16_t vq_idx = vq->vring_idx; 503 504 submit_inflight_desc(bvsession, vq); 505 506 reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs)); 507 if (!reqs_cnt) { 508 return; 509 } 510 511 for (i = 0; i < reqs_cnt; i++) { 512 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n", 513 reqs[i]); 514 515 if (spdk_unlikely(reqs[i] >= vq->vring.size)) { 516 SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n", 517 vsession->name, reqs[i], vq->vring.size); 518 vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 519 continue; 520 } 521 522 rte_vhost_set_inflight_desc_split(vsession->vid, vq_idx, reqs[i]); 523 task = &((struct spdk_vhost_blk_task *)vq->tasks)[reqs[i]]; 524 if (spdk_unlikely(task->used)) { 525 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n", 526 vsession->name, reqs[i]); 527 vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0); 528 continue; 529 } 530 531 vsession->task_cnt++; 532 533 task->used = true; 534 task->iovcnt = SPDK_COUNTOF(task->iovs); 535 task->status = NULL; 536 task->used_len = 0; 537 538 rc = process_blk_request(task, bvsession, vq); 539 if (rc == 0) { 540 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task, 541 reqs[i]); 542 } else { 543 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, reqs[i]); 544 } 545 } 546 } 547 548 static int 549 vdev_worker(void *arg) 550 { 551 struct spdk_vhost_blk_session *bvsession = arg; 552 struct spdk_vhost_session *vsession = &bvsession->vsession; 553 554 uint16_t q_idx; 555 556 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 557 process_vq(bvsession, &vsession->virtqueue[q_idx]); 558 } 559 560 vhost_session_used_signal(vsession); 561 562 return -1; 563 } 564 565 static void 566 no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq) 567 { 568 struct spdk_vhost_session *vsession = &bvsession->vsession; 569 struct iovec iovs[SPDK_VHOST_IOVS_MAX]; 570 uint32_t length; 571 uint16_t iovcnt, req_idx; 572 573 if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) { 574 return; 575 } 576 577 iovcnt = SPDK_COUNTOF(iovs); 578 if (blk_iovs_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) { 579 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR; 580 SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx); 581 } 582 583 vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0); 584 } 585 586 static int 587 no_bdev_vdev_worker(void *arg) 588 { 589 struct spdk_vhost_blk_session *bvsession = arg; 590 struct spdk_vhost_session *vsession = &bvsession->vsession; 591 uint16_t q_idx; 592 593 for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) { 594 no_bdev_process_vq(bvsession, &vsession->virtqueue[q_idx]); 595 } 596 597 vhost_session_used_signal(vsession); 598 599 if (vsession->task_cnt == 0 && bvsession->io_channel) { 600 spdk_put_io_channel(bvsession->io_channel); 601 bvsession->io_channel = NULL; 602 } 603 604 return -1; 605 } 606 607 static struct spdk_vhost_blk_session * 608 to_blk_session(struct spdk_vhost_session *vsession) 609 { 610 assert(vsession->vdev->backend == &vhost_blk_device_backend); 611 return (struct spdk_vhost_blk_session *)vsession; 612 } 613 614 static struct spdk_vhost_blk_dev * 615 to_blk_dev(struct spdk_vhost_dev *vdev) 616 { 617 if (vdev == NULL) { 618 return NULL; 619 } 620 621 if (vdev->backend != &vhost_blk_device_backend) { 622 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name); 623 return NULL; 624 } 625 626 return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev); 627 } 628 629 struct spdk_bdev * 630 spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev) 631 { 632 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 633 634 assert(bvdev != NULL); 635 return bvdev->bdev; 636 } 637 638 static void 639 vhost_dev_bdev_remove_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx) 640 { 641 642 /* All sessions have been notified, time to close the bdev */ 643 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 644 645 assert(bvdev != NULL); 646 spdk_bdev_close(bvdev->bdev_desc); 647 bvdev->bdev_desc = NULL; 648 bvdev->bdev = NULL; 649 } 650 651 static int 652 vhost_session_bdev_remove_cb(struct spdk_vhost_dev *vdev, 653 struct spdk_vhost_session *vsession, 654 void *ctx) 655 { 656 struct spdk_vhost_blk_session *bvsession; 657 658 bvsession = (struct spdk_vhost_blk_session *)vsession; 659 if (bvsession->requestq_poller) { 660 spdk_poller_unregister(&bvsession->requestq_poller); 661 bvsession->requestq_poller = spdk_poller_register(no_bdev_vdev_worker, bvsession, 0); 662 } 663 664 return 0; 665 } 666 667 static void 668 bdev_remove_cb(void *remove_ctx) 669 { 670 struct spdk_vhost_blk_dev *bvdev = remove_ctx; 671 672 SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n", 673 bvdev->vdev.name); 674 675 spdk_vhost_lock(); 676 vhost_dev_foreach_session(&bvdev->vdev, vhost_session_bdev_remove_cb, 677 vhost_dev_bdev_remove_cpl_cb, NULL); 678 spdk_vhost_unlock(); 679 } 680 681 static void 682 free_task_pool(struct spdk_vhost_blk_session *bvsession) 683 { 684 struct spdk_vhost_session *vsession = &bvsession->vsession; 685 struct spdk_vhost_virtqueue *vq; 686 uint16_t i; 687 688 for (i = 0; i < vsession->max_queues; i++) { 689 vq = &vsession->virtqueue[i]; 690 if (vq->tasks == NULL) { 691 continue; 692 } 693 694 spdk_free(vq->tasks); 695 vq->tasks = NULL; 696 } 697 } 698 699 static int 700 alloc_task_pool(struct spdk_vhost_blk_session *bvsession) 701 { 702 struct spdk_vhost_session *vsession = &bvsession->vsession; 703 struct spdk_vhost_virtqueue *vq; 704 struct spdk_vhost_blk_task *task; 705 uint32_t task_cnt; 706 uint16_t i; 707 uint32_t j; 708 709 for (i = 0; i < vsession->max_queues; i++) { 710 vq = &vsession->virtqueue[i]; 711 if (vq->vring.desc == NULL) { 712 continue; 713 } 714 715 task_cnt = vq->vring.size; 716 if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) { 717 /* sanity check */ 718 SPDK_ERRLOG("%s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n", 719 vsession->name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE); 720 free_task_pool(bvsession); 721 return -1; 722 } 723 vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt, 724 SPDK_CACHE_LINE_SIZE, NULL, 725 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 726 if (vq->tasks == NULL) { 727 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n", 728 vsession->name, task_cnt, i); 729 free_task_pool(bvsession); 730 return -1; 731 } 732 733 for (j = 0; j < task_cnt; j++) { 734 task = &((struct spdk_vhost_blk_task *)vq->tasks)[j]; 735 task->bvsession = bvsession; 736 task->req_idx = j; 737 task->vq = vq; 738 } 739 } 740 741 return 0; 742 } 743 744 static int 745 vhost_blk_start_cb(struct spdk_vhost_dev *vdev, 746 struct spdk_vhost_session *vsession, void *unused) 747 { 748 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 749 struct spdk_vhost_blk_dev *bvdev; 750 int i, rc = 0; 751 752 bvdev = to_blk_dev(vdev); 753 assert(bvdev != NULL); 754 bvsession->bvdev = bvdev; 755 756 /* validate all I/O queues are in a contiguous index range */ 757 for (i = 0; i < vsession->max_queues; i++) { 758 if (vsession->virtqueue[i].vring.desc == NULL) { 759 SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i); 760 rc = -1; 761 goto out; 762 } 763 } 764 765 rc = alloc_task_pool(bvsession); 766 if (rc != 0) { 767 SPDK_ERRLOG("%s: failed to alloc task pool.\n", vsession->name); 768 goto out; 769 } 770 771 if (bvdev->bdev) { 772 bvsession->io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc); 773 if (!bvsession->io_channel) { 774 free_task_pool(bvsession); 775 SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name); 776 rc = -1; 777 goto out; 778 } 779 } 780 781 bvsession->requestq_poller = spdk_poller_register(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker, 782 bvsession, 0); 783 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: started poller on lcore %d\n", 784 vsession->name, spdk_env_get_current_core()); 785 out: 786 vhost_session_start_done(vsession, rc); 787 return rc; 788 } 789 790 static int 791 vhost_blk_start(struct spdk_vhost_session *vsession) 792 { 793 struct vhost_poll_group *pg; 794 795 pg = vhost_get_poll_group(&vsession->vdev->cpumask); 796 return vhost_session_send_event(pg, vsession, vhost_blk_start_cb, 797 3, "start session"); 798 } 799 800 static int 801 destroy_session_poller_cb(void *arg) 802 { 803 struct spdk_vhost_blk_session *bvsession = arg; 804 struct spdk_vhost_session *vsession = &bvsession->vsession; 805 int i; 806 807 if (vsession->task_cnt > 0) { 808 return -1; 809 } 810 811 if (spdk_vhost_trylock() != 0) { 812 return -1; 813 } 814 815 for (i = 0; i < vsession->max_queues; i++) { 816 vsession->virtqueue[i].next_event_time = 0; 817 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]); 818 } 819 820 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: stopping poller on lcore %d\n", 821 vsession->name, spdk_env_get_current_core()); 822 823 if (bvsession->io_channel) { 824 spdk_put_io_channel(bvsession->io_channel); 825 bvsession->io_channel = NULL; 826 } 827 828 free_task_pool(bvsession); 829 spdk_poller_unregister(&bvsession->stop_poller); 830 vhost_session_stop_done(vsession, 0); 831 832 spdk_vhost_unlock(); 833 return -1; 834 } 835 836 static int 837 vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, 838 struct spdk_vhost_session *vsession, void *unused) 839 { 840 struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession); 841 842 spdk_poller_unregister(&bvsession->requestq_poller); 843 bvsession->stop_poller = spdk_poller_register(destroy_session_poller_cb, 844 bvsession, 1000); 845 return 0; 846 } 847 848 static int 849 vhost_blk_stop(struct spdk_vhost_session *vsession) 850 { 851 return vhost_session_send_event(vsession->poll_group, vsession, 852 vhost_blk_stop_cb, 3, "stop session"); 853 } 854 855 static void 856 vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 857 { 858 struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev); 859 struct spdk_vhost_blk_dev *bvdev; 860 861 bvdev = to_blk_dev(vdev); 862 assert(bvdev != NULL); 863 spdk_json_write_named_object_begin(w, "block"); 864 865 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 866 867 spdk_json_write_name(w, "bdev"); 868 if (bdev) { 869 spdk_json_write_string(w, spdk_bdev_get_name(bdev)); 870 } else { 871 spdk_json_write_null(w); 872 } 873 874 spdk_json_write_object_end(w); 875 } 876 877 static void 878 vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) 879 { 880 struct spdk_vhost_blk_dev *bvdev; 881 882 bvdev = to_blk_dev(vdev); 883 assert(bvdev != NULL); 884 if (!bvdev->bdev) { 885 return; 886 } 887 888 spdk_json_write_object_begin(w); 889 spdk_json_write_named_string(w, "method", "vhost_create_blk_controller"); 890 891 spdk_json_write_named_object_begin(w, "params"); 892 spdk_json_write_named_string(w, "ctrlr", vdev->name); 893 spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev)); 894 spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(&vdev->cpumask)); 895 spdk_json_write_named_bool(w, "readonly", bvdev->readonly); 896 spdk_json_write_object_end(w); 897 898 spdk_json_write_object_end(w); 899 } 900 901 static int vhost_blk_destroy(struct spdk_vhost_dev *dev); 902 903 static int 904 vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config, 905 uint32_t len) 906 { 907 struct virtio_blk_config blkcfg; 908 struct spdk_vhost_blk_dev *bvdev; 909 struct spdk_bdev *bdev; 910 uint32_t blk_size; 911 uint64_t blkcnt; 912 913 bvdev = to_blk_dev(vdev); 914 assert(bvdev != NULL); 915 bdev = bvdev->bdev; 916 if (bdev == NULL) { 917 /* We can't just return -1 here as this GET_CONFIG message might 918 * be caused by a QEMU VM reboot. Returning -1 will indicate an 919 * error to QEMU, who might then decide to terminate itself. 920 * We don't want that. A simple reboot shouldn't break the system. 921 * 922 * Presenting a block device with block size 0 and block count 0 923 * doesn't cause any problems on QEMU side and the virtio-pci 924 * device is even still available inside the VM, but there will 925 * be no block device created for it - the kernel drivers will 926 * silently reject it. 927 */ 928 blk_size = 0; 929 blkcnt = 0; 930 } else { 931 blk_size = spdk_bdev_get_block_size(bdev); 932 blkcnt = spdk_bdev_get_num_blocks(bdev); 933 if (spdk_bdev_get_buf_align(bdev) > 1) { 934 blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE; 935 blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, BDEV_IO_NUM_CHILD_IOV - 2 - 1); 936 } else { 937 blkcfg.size_max = 131072; 938 /* -2 for REQ and RESP and -1 for region boundary splitting */ 939 blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1; 940 } 941 } 942 943 memset(&blkcfg, 0, sizeof(blkcfg)); 944 blkcfg.blk_size = blk_size; 945 /* minimum I/O size in blocks */ 946 blkcfg.min_io_size = 1; 947 /* expressed in 512 Bytes sectors */ 948 blkcfg.capacity = (blkcnt * blk_size) / 512; 949 /* QEMU can overwrite this value when started */ 950 blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES; 951 952 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 953 /* 16MiB, expressed in 512 Bytes */ 954 blkcfg.max_discard_sectors = 32768; 955 blkcfg.max_discard_seg = 1; 956 blkcfg.discard_sector_alignment = blk_size / 512; 957 } 958 if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 959 blkcfg.max_write_zeroes_sectors = 32768; 960 blkcfg.max_write_zeroes_seg = 1; 961 } 962 963 memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg))); 964 965 return 0; 966 } 967 968 static const struct spdk_vhost_dev_backend vhost_blk_device_backend = { 969 .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session), 970 .start_session = vhost_blk_start, 971 .stop_session = vhost_blk_stop, 972 .vhost_get_config = vhost_blk_get_config, 973 .dump_info_json = vhost_blk_dump_info_json, 974 .write_config_json = vhost_blk_write_config_json, 975 .remove_device = vhost_blk_destroy, 976 }; 977 978 int 979 vhost_blk_controller_construct(void) 980 { 981 struct spdk_conf_section *sp; 982 unsigned ctrlr_num; 983 char *bdev_name; 984 char *cpumask; 985 char *name; 986 bool readonly; 987 988 for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { 989 if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) { 990 continue; 991 } 992 993 if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) { 994 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 995 spdk_conf_section_get_name(sp)); 996 return -1; 997 } 998 999 name = spdk_conf_section_get_val(sp, "Name"); 1000 if (name == NULL) { 1001 SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num); 1002 return -1; 1003 } 1004 1005 cpumask = spdk_conf_section_get_val(sp, "Cpumask"); 1006 readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false); 1007 1008 bdev_name = spdk_conf_section_get_val(sp, "Dev"); 1009 if (bdev_name == NULL) { 1010 continue; 1011 } 1012 1013 if (spdk_vhost_blk_construct(name, cpumask, bdev_name, readonly) < 0) { 1014 return -1; 1015 } 1016 } 1017 1018 return 0; 1019 } 1020 1021 int 1022 spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, bool readonly) 1023 { 1024 struct spdk_vhost_blk_dev *bvdev = NULL; 1025 struct spdk_vhost_dev *vdev; 1026 struct spdk_bdev *bdev; 1027 int ret = 0; 1028 1029 spdk_vhost_lock(); 1030 bdev = spdk_bdev_get_by_name(dev_name); 1031 if (bdev == NULL) { 1032 SPDK_ERRLOG("%s: bdev '%s' not found\n", 1033 name, dev_name); 1034 ret = -ENODEV; 1035 goto out; 1036 } 1037 1038 bvdev = calloc(1, sizeof(*bvdev)); 1039 if (bvdev == NULL) { 1040 ret = -ENOMEM; 1041 goto out; 1042 } 1043 1044 vdev = &bvdev->vdev; 1045 vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE; 1046 vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES; 1047 vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES; 1048 1049 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1050 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD); 1051 } 1052 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1053 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 1054 } 1055 if (readonly) { 1056 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO); 1057 } 1058 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) { 1059 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH); 1060 } 1061 1062 ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc); 1063 if (ret != 0) { 1064 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n", 1065 name, dev_name, ret); 1066 goto out; 1067 } 1068 1069 bvdev->bdev = bdev; 1070 bvdev->readonly = readonly; 1071 ret = vhost_dev_register(vdev, name, cpumask, &vhost_blk_device_backend); 1072 if (ret != 0) { 1073 spdk_bdev_close(bvdev->bdev_desc); 1074 goto out; 1075 } 1076 1077 SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: using bdev '%s'\n", name, dev_name); 1078 out: 1079 if (ret != 0 && bvdev) { 1080 free(bvdev); 1081 } 1082 spdk_vhost_unlock(); 1083 return ret; 1084 } 1085 1086 static int 1087 vhost_blk_destroy(struct spdk_vhost_dev *vdev) 1088 { 1089 struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev); 1090 int rc; 1091 1092 assert(bvdev != NULL); 1093 rc = vhost_dev_unregister(&bvdev->vdev); 1094 if (rc != 0) { 1095 return rc; 1096 } 1097 1098 if (bvdev->bdev_desc) { 1099 spdk_bdev_close(bvdev->bdev_desc); 1100 bvdev->bdev_desc = NULL; 1101 } 1102 bvdev->bdev = NULL; 1103 1104 free(bvdev); 1105 return 0; 1106 } 1107 1108 SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK) 1109 SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA) 1110