1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 #include "spdk/file.h" 20 21 #include "spdk/log.h" 22 #include "spdk_internal/uring.h" 23 24 #ifdef SPDK_CONFIG_URING_ZNS 25 #include <linux/blkzoned.h> 26 #define SECTOR_SHIFT 9 27 #endif 28 29 struct bdev_uring_zoned_dev { 30 uint64_t num_zones; 31 uint32_t zone_shift; 32 uint32_t lba_shift; 33 }; 34 35 struct bdev_uring_io_channel { 36 struct bdev_uring_group_channel *group_ch; 37 }; 38 39 struct bdev_uring_group_channel { 40 uint64_t io_inflight; 41 uint64_t io_pending; 42 struct spdk_poller *poller; 43 struct io_uring uring; 44 }; 45 46 struct bdev_uring_task { 47 uint64_t len; 48 struct bdev_uring_io_channel *ch; 49 TAILQ_ENTRY(bdev_uring_task) link; 50 }; 51 52 struct bdev_uring { 53 struct spdk_bdev bdev; 54 struct bdev_uring_zoned_dev zd; 55 char *filename; 56 int fd; 57 TAILQ_ENTRY(bdev_uring) link; 58 }; 59 60 static int bdev_uring_init(void); 61 static void bdev_uring_fini(void); 62 static void uring_free_bdev(struct bdev_uring *uring); 63 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 64 65 #define SPDK_URING_QUEUE_DEPTH 512 66 #define MAX_EVENTS_PER_POLL 32 67 68 static int 69 bdev_uring_get_ctx_size(void) 70 { 71 return sizeof(struct bdev_uring_task); 72 } 73 74 static struct spdk_bdev_module uring_if = { 75 .name = "uring", 76 .module_init = bdev_uring_init, 77 .module_fini = bdev_uring_fini, 78 .get_ctx_size = bdev_uring_get_ctx_size, 79 }; 80 81 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 82 83 static int 84 bdev_uring_open(struct bdev_uring *bdev) 85 { 86 int fd; 87 88 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 89 if (fd < 0) { 90 /* Try without O_DIRECT for non-disk files */ 91 fd = open(bdev->filename, O_RDWR | O_NOATIME); 92 if (fd < 0) { 93 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 94 bdev->filename, errno, spdk_strerror(errno)); 95 bdev->fd = -1; 96 return -1; 97 } 98 } 99 100 bdev->fd = fd; 101 102 return 0; 103 } 104 105 static void 106 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 107 { 108 } 109 110 int 111 bdev_uring_rescan(const char *name) 112 { 113 struct spdk_bdev_desc *desc; 114 struct spdk_bdev *bdev; 115 struct bdev_uring *uring; 116 uint64_t uring_size, blockcnt; 117 int rc; 118 119 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 120 if (rc != 0) { 121 return rc; 122 } 123 124 bdev = spdk_bdev_desc_get_bdev(desc); 125 if (bdev->module != &uring_if) { 126 rc = -ENODEV; 127 goto exit; 128 } 129 130 uring = SPDK_CONTAINEROF(bdev, struct bdev_uring, bdev); 131 uring_size = spdk_fd_get_size(uring->fd); 132 blockcnt = uring_size / bdev->blocklen; 133 134 if (bdev->blockcnt != blockcnt) { 135 SPDK_NOTICELOG("URING device is resized: bdev name %s, old block count %" PRIu64 136 ", new block count %" 137 PRIu64 "\n", 138 uring->filename, 139 bdev->blockcnt, 140 blockcnt); 141 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt); 142 if (rc != 0) { 143 SPDK_ERRLOG("Could not change num blocks for uring bdev: name %s, errno: %d.\n", 144 uring->filename, rc); 145 goto exit; 146 } 147 } 148 149 exit: 150 spdk_bdev_close(desc); 151 return rc; 152 } 153 154 static int 155 bdev_uring_close(struct bdev_uring *bdev) 156 { 157 int rc; 158 159 if (bdev->fd == -1) { 160 return 0; 161 } 162 163 rc = close(bdev->fd); 164 if (rc < 0) { 165 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 166 bdev->fd, errno, spdk_strerror(errno)); 167 return -1; 168 } 169 170 bdev->fd = -1; 171 172 return 0; 173 } 174 175 static int64_t 176 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 177 struct bdev_uring_task *uring_task, 178 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 179 { 180 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 181 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 182 struct io_uring_sqe *sqe; 183 184 sqe = io_uring_get_sqe(&group_ch->uring); 185 if (!sqe) { 186 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 187 return -ENOMEM; 188 } 189 190 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 191 io_uring_sqe_set_data(sqe, uring_task); 192 uring_task->len = nbytes; 193 uring_task->ch = uring_ch; 194 195 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 196 iovcnt, nbytes, offset); 197 198 group_ch->io_pending++; 199 return nbytes; 200 } 201 202 static int64_t 203 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 204 struct bdev_uring_task *uring_task, 205 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 206 { 207 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 208 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 209 struct io_uring_sqe *sqe; 210 211 sqe = io_uring_get_sqe(&group_ch->uring); 212 if (!sqe) { 213 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 214 return -ENOMEM; 215 } 216 217 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 218 io_uring_sqe_set_data(sqe, uring_task); 219 uring_task->len = nbytes; 220 uring_task->ch = uring_ch; 221 222 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 223 iovcnt, nbytes, offset); 224 225 group_ch->io_pending++; 226 return nbytes; 227 } 228 229 static int 230 bdev_uring_destruct(void *ctx) 231 { 232 struct bdev_uring *uring = ctx; 233 int rc = 0; 234 235 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 236 rc = bdev_uring_close(uring); 237 if (rc < 0) { 238 SPDK_ERRLOG("bdev_uring_close() failed\n"); 239 } 240 spdk_io_device_unregister(uring, NULL); 241 uring_free_bdev(uring); 242 return rc; 243 } 244 245 static int 246 bdev_uring_reap(struct io_uring *ring, int max) 247 { 248 int i, count, ret; 249 struct io_uring_cqe *cqe; 250 struct bdev_uring_task *uring_task; 251 enum spdk_bdev_io_status status; 252 253 count = 0; 254 for (i = 0; i < max; i++) { 255 ret = io_uring_peek_cqe(ring, &cqe); 256 if (ret != 0) { 257 return ret; 258 } 259 260 if (cqe == NULL) { 261 return count; 262 } 263 264 uring_task = (struct bdev_uring_task *)cqe->user_data; 265 if (cqe->res != (signed)uring_task->len) { 266 status = SPDK_BDEV_IO_STATUS_FAILED; 267 } else { 268 status = SPDK_BDEV_IO_STATUS_SUCCESS; 269 } 270 271 uring_task->ch->group_ch->io_inflight--; 272 io_uring_cqe_seen(ring, cqe); 273 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 274 count++; 275 } 276 277 return count; 278 } 279 280 static int 281 bdev_uring_group_poll(void *arg) 282 { 283 struct bdev_uring_group_channel *group_ch = arg; 284 int to_complete, to_submit; 285 int count, ret; 286 287 to_submit = group_ch->io_pending; 288 289 if (to_submit > 0) { 290 /* If there are I/O to submit, use io_uring_submit here. 291 * It will automatically call spdk_io_uring_enter appropriately. */ 292 ret = io_uring_submit(&group_ch->uring); 293 if (ret < 0) { 294 return SPDK_POLLER_BUSY; 295 } 296 297 group_ch->io_pending = 0; 298 group_ch->io_inflight += to_submit; 299 } 300 301 to_complete = group_ch->io_inflight; 302 count = 0; 303 if (to_complete > 0) { 304 count = bdev_uring_reap(&group_ch->uring, to_complete); 305 } 306 307 if (count + to_submit > 0) { 308 return SPDK_POLLER_BUSY; 309 } else { 310 return SPDK_POLLER_IDLE; 311 } 312 } 313 314 static void 315 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 316 bool success) 317 { 318 int64_t ret = 0; 319 320 if (!success) { 321 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 322 return; 323 } 324 325 switch (bdev_io->type) { 326 case SPDK_BDEV_IO_TYPE_READ: 327 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 328 ch, 329 (struct bdev_uring_task *)bdev_io->driver_ctx, 330 bdev_io->u.bdev.iovs, 331 bdev_io->u.bdev.iovcnt, 332 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 333 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 334 break; 335 case SPDK_BDEV_IO_TYPE_WRITE: 336 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 337 ch, 338 (struct bdev_uring_task *)bdev_io->driver_ctx, 339 bdev_io->u.bdev.iovs, 340 bdev_io->u.bdev.iovcnt, 341 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 342 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 343 break; 344 default: 345 SPDK_ERRLOG("Wrong io type\n"); 346 break; 347 } 348 349 if (ret == -ENOMEM) { 350 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 351 } 352 } 353 354 #ifdef SPDK_CONFIG_URING_ZNS 355 static int 356 bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 357 { 358 switch (zones_rep->type) { 359 case BLK_ZONE_TYPE_CONVENTIONAL: 360 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV; 361 break; 362 case BLK_ZONE_TYPE_SEQWRITE_REQ: 363 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR; 364 break; 365 case BLK_ZONE_TYPE_SEQWRITE_PREF: 366 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP; 367 break; 368 default: 369 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type); 370 return -EIO; 371 } 372 return 0; 373 } 374 375 static int 376 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 377 { 378 switch (zones_rep->cond) { 379 case BLK_ZONE_COND_EMPTY: 380 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 381 break; 382 case BLK_ZONE_COND_IMP_OPEN: 383 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 384 break; 385 case BLK_ZONE_COND_EXP_OPEN: 386 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 387 break; 388 case BLK_ZONE_COND_CLOSED: 389 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 390 break; 391 case BLK_ZONE_COND_READONLY: 392 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 393 break; 394 case BLK_ZONE_COND_FULL: 395 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 396 break; 397 case BLK_ZONE_COND_OFFLINE: 398 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 399 break; 400 case BLK_ZONE_COND_NOT_WP: 401 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP; 402 break; 403 default: 404 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 405 return -EIO; 406 } 407 return 0; 408 } 409 410 static int 411 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 412 { 413 struct bdev_uring *uring; 414 struct blk_zone_range range; 415 long unsigned zone_mgmt_op; 416 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 417 418 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 419 420 switch (bdev_io->u.zone_mgmt.zone_action) { 421 case SPDK_BDEV_ZONE_RESET: 422 zone_mgmt_op = BLKRESETZONE; 423 break; 424 case SPDK_BDEV_ZONE_OPEN: 425 zone_mgmt_op = BLKOPENZONE; 426 break; 427 case SPDK_BDEV_ZONE_CLOSE: 428 zone_mgmt_op = BLKCLOSEZONE; 429 break; 430 case SPDK_BDEV_ZONE_FINISH: 431 zone_mgmt_op = BLKFINISHZONE; 432 break; 433 default: 434 return -EINVAL; 435 } 436 437 range.sector = (zone_id << uring->zd.lba_shift); 438 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 439 440 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 441 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 442 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 443 return -EINVAL; 444 } 445 446 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 447 448 return 0; 449 } 450 451 static int 452 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 453 { 454 struct bdev_uring *uring; 455 struct blk_zone *zones; 456 struct blk_zone_report *rep; 457 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 458 size_t repsize; 459 uint32_t i, shift; 460 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 461 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 462 463 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 464 shift = uring->zd.lba_shift; 465 466 if ((num_zones > uring->zd.num_zones) || !num_zones) { 467 return -EINVAL; 468 } 469 470 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 471 rep = (struct blk_zone_report *)malloc(repsize); 472 if (!rep) { 473 return -ENOMEM; 474 } 475 476 zones = (struct blk_zone *)(rep + 1); 477 478 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 479 memset(rep, 0, repsize); 480 rep->sector = zone_id; 481 rep->nr_zones = num_zones; 482 483 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 484 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 485 errno, strerror(errno)); 486 free(rep); 487 return -EINVAL; 488 } 489 490 if (!rep->nr_zones) { 491 break; 492 } 493 494 for (i = 0; i < rep->nr_zones; i++) { 495 zone_info->zone_id = ((zones + i)->start >> shift); 496 zone_info->write_pointer = ((zones + i)->wp >> shift); 497 zone_info->capacity = ((zones + i)->capacity >> shift); 498 499 bdev_uring_fill_zone_state(zone_info, zones + i); 500 bdev_uring_fill_zone_type(zone_info, zones + i); 501 502 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 503 zone_info++; 504 num_zones--; 505 } 506 } 507 508 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 509 free(rep); 510 return 0; 511 } 512 513 static int 514 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 515 { 516 char *filename_dup = NULL, *base; 517 char *str = NULL; 518 uint32_t val; 519 uint32_t zinfo; 520 int retval = -1; 521 struct stat sb; 522 char resolved_path[PATH_MAX], *rp; 523 char *sysfs_path = NULL; 524 525 uring->bdev.zoned = false; 526 527 /* Follow symlink */ 528 if ((rp = realpath(filename, resolved_path))) { 529 filename = rp; 530 } 531 532 /* Perform check on block devices only */ 533 if (stat(filename, &sb) == 0 && S_ISBLK(sb.st_mode)) { 534 return 0; 535 } 536 537 /* strdup() because basename() may modify the passed parameter */ 538 filename_dup = strdup(filename); 539 if (filename_dup == NULL) { 540 SPDK_ERRLOG("Could not duplicate string %s\n", filename); 541 return -1; 542 } 543 544 base = basename(filename_dup); 545 free(filename_dup); 546 sysfs_path = spdk_sprintf_alloc("/sys/block/%s/queue/zoned", base); 547 retval = spdk_read_sysfs_attribute(&str, "%s", sysfs_path); 548 /* Check if this is a zoned block device */ 549 if (retval < 0) { 550 SPDK_ERRLOG("Unable to open file %s. errno: %d\n", sysfs_path, retval); 551 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 552 /* Only host-aware & host-managed zns devices */ 553 uring->bdev.zoned = true; 554 555 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 556 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 557 goto err_ret; 558 } 559 uring->zd.num_zones = zinfo; 560 561 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 562 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 563 goto err_ret; 564 } 565 566 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 567 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 568 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 569 570 retval = spdk_read_sysfs_attribute_uint32(&val, "/sys/block/%s/queue/max_open_zones", base); 571 if (retval < 0) { 572 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", retval, strerror(-retval)); 573 goto err_ret; 574 } 575 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = val; 576 577 retval = spdk_read_sysfs_attribute_uint32(&val, "/sys/block/%s/queue/max_active_zones", base); 578 if (retval < 0) { 579 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", retval, strerror(-retval)); 580 goto err_ret; 581 } 582 uring->bdev.max_active_zones = val; 583 retval = 0; 584 } else { 585 retval = 0; /* queue/zoned=none */ 586 } 587 err_ret: 588 free(str); 589 free(sysfs_path); 590 return retval; 591 } 592 #else 593 /* No support for zoned devices */ 594 static int 595 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 596 { 597 return -1; 598 } 599 600 static int 601 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 602 { 603 return -1; 604 } 605 606 static int 607 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 608 { 609 return 0; 610 } 611 #endif 612 613 static int 614 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 615 { 616 617 switch (bdev_io->type) { 618 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 619 return bdev_uring_zone_get_info(bdev_io); 620 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 621 return bdev_uring_zone_management_op(bdev_io); 622 /* Read and write operations must be performed on buffers aligned to 623 * bdev->required_alignment. If user specified unaligned buffers, 624 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 625 case SPDK_BDEV_IO_TYPE_READ: 626 case SPDK_BDEV_IO_TYPE_WRITE: 627 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 628 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 629 return 0; 630 default: 631 return -1; 632 } 633 } 634 635 static void 636 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 637 { 638 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 639 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 640 } 641 } 642 643 static bool 644 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 645 { 646 switch (io_type) { 647 #ifdef SPDK_CONFIG_URING_ZNS 648 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 649 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 650 #endif 651 case SPDK_BDEV_IO_TYPE_READ: 652 case SPDK_BDEV_IO_TYPE_WRITE: 653 return true; 654 default: 655 return false; 656 } 657 } 658 659 static int 660 bdev_uring_create_cb(void *io_device, void *ctx_buf) 661 { 662 struct bdev_uring_io_channel *ch = ctx_buf; 663 664 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 665 666 return 0; 667 } 668 669 static void 670 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 671 { 672 struct bdev_uring_io_channel *ch = ctx_buf; 673 674 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 675 } 676 677 static struct spdk_io_channel * 678 bdev_uring_get_io_channel(void *ctx) 679 { 680 struct bdev_uring *uring = ctx; 681 682 return spdk_get_io_channel(uring); 683 } 684 685 static int 686 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 687 { 688 struct bdev_uring *uring = ctx; 689 690 spdk_json_write_named_object_begin(w, "uring"); 691 692 spdk_json_write_named_string(w, "filename", uring->filename); 693 694 spdk_json_write_object_end(w); 695 696 return 0; 697 } 698 699 static void 700 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 701 { 702 struct bdev_uring *uring = bdev->ctxt; 703 char uuid_str[SPDK_UUID_STRING_LEN]; 704 705 spdk_json_write_object_begin(w); 706 707 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 708 709 spdk_json_write_named_object_begin(w, "params"); 710 spdk_json_write_named_string(w, "name", bdev->name); 711 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 712 spdk_json_write_named_string(w, "filename", uring->filename); 713 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 714 spdk_json_write_named_string(w, "uuid", uuid_str); 715 spdk_json_write_object_end(w); 716 717 spdk_json_write_object_end(w); 718 } 719 720 static const struct spdk_bdev_fn_table uring_fn_table = { 721 .destruct = bdev_uring_destruct, 722 .submit_request = bdev_uring_submit_request, 723 .io_type_supported = bdev_uring_io_type_supported, 724 .get_io_channel = bdev_uring_get_io_channel, 725 .dump_info_json = bdev_uring_dump_info_json, 726 .write_config_json = bdev_uring_write_json_config, 727 }; 728 729 static void 730 uring_free_bdev(struct bdev_uring *uring) 731 { 732 if (uring == NULL) { 733 return; 734 } 735 free(uring->filename); 736 free(uring->bdev.name); 737 free(uring); 738 } 739 740 static int 741 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 742 { 743 struct bdev_uring_group_channel *ch = ctx_buf; 744 745 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 746 * local devices but also devices attached from remote target */ 747 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 748 SPDK_ERRLOG("uring I/O context setup failure\n"); 749 return -1; 750 } 751 752 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 753 return 0; 754 } 755 756 static void 757 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 758 { 759 struct bdev_uring_group_channel *ch = ctx_buf; 760 761 io_uring_queue_exit(&ch->uring); 762 763 spdk_poller_unregister(&ch->poller); 764 } 765 766 struct spdk_bdev * 767 create_uring_bdev(const struct bdev_uring_opts *opts) 768 { 769 struct bdev_uring *uring; 770 uint32_t detected_block_size; 771 uint64_t bdev_size; 772 int rc; 773 uint32_t block_size = opts->block_size; 774 775 uring = calloc(1, sizeof(*uring)); 776 if (!uring) { 777 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 778 return NULL; 779 } 780 781 uring->filename = strdup(opts->filename); 782 if (!uring->filename) { 783 goto error_return; 784 } 785 786 if (bdev_uring_open(uring)) { 787 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", opts->filename, uring->fd, errno); 788 goto error_return; 789 } 790 791 bdev_size = spdk_fd_get_size(uring->fd); 792 793 uring->bdev.name = strdup(opts->name); 794 if (!uring->bdev.name) { 795 goto error_return; 796 } 797 uring->bdev.product_name = "URING bdev"; 798 uring->bdev.module = &uring_if; 799 800 uring->bdev.write_cache = 0; 801 802 detected_block_size = spdk_fd_get_blocklen(uring->fd); 803 if (block_size == 0) { 804 /* User did not specify block size - use autodetected block size. */ 805 if (detected_block_size == 0) { 806 SPDK_ERRLOG("Block size could not be auto-detected\n"); 807 goto error_return; 808 } 809 block_size = detected_block_size; 810 } else { 811 if (block_size < detected_block_size) { 812 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 813 "auto-detected block size %" PRIu32 "\n", 814 block_size, detected_block_size); 815 goto error_return; 816 } else if (detected_block_size != 0 && block_size != detected_block_size) { 817 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 818 "auto-detected block size %" PRIu32 "\n", 819 block_size, detected_block_size); 820 } 821 } 822 823 if (block_size < 512) { 824 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 825 goto error_return; 826 } 827 828 if (!spdk_u32_is_pow2(block_size)) { 829 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 830 goto error_return; 831 } 832 833 uring->bdev.blocklen = block_size; 834 uring->bdev.required_alignment = spdk_u32log2(block_size); 835 836 rc = bdev_uring_check_zoned_support(uring, opts->name, opts->filename); 837 if (rc) { 838 goto error_return; 839 } 840 841 if (bdev_size % uring->bdev.blocklen != 0) { 842 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 843 bdev_size, uring->bdev.blocklen); 844 goto error_return; 845 } 846 847 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 848 uring->bdev.ctxt = uring; 849 850 uring->bdev.fn_table = &uring_fn_table; 851 852 if (!spdk_mem_all_zero(&opts->uuid, sizeof(opts->uuid))) { 853 spdk_uuid_copy(&uring->bdev.uuid, &opts->uuid); 854 } 855 856 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 857 sizeof(struct bdev_uring_io_channel), 858 uring->bdev.name); 859 rc = spdk_bdev_register(&uring->bdev); 860 if (rc) { 861 spdk_io_device_unregister(uring, NULL); 862 goto error_return; 863 } 864 865 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 866 return &uring->bdev; 867 868 error_return: 869 bdev_uring_close(uring); 870 uring_free_bdev(uring); 871 return NULL; 872 } 873 874 struct delete_uring_bdev_ctx { 875 spdk_delete_uring_complete cb_fn; 876 void *cb_arg; 877 }; 878 879 static void 880 uring_bdev_unregister_cb(void *arg, int bdeverrno) 881 { 882 struct delete_uring_bdev_ctx *ctx = arg; 883 884 ctx->cb_fn(ctx->cb_arg, bdeverrno); 885 free(ctx); 886 } 887 888 void 889 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 890 { 891 struct delete_uring_bdev_ctx *ctx; 892 int rc; 893 894 ctx = calloc(1, sizeof(*ctx)); 895 if (ctx == NULL) { 896 cb_fn(cb_arg, -ENOMEM); 897 return; 898 } 899 900 ctx->cb_fn = cb_fn; 901 ctx->cb_arg = cb_arg; 902 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 903 if (rc != 0) { 904 uring_bdev_unregister_cb(ctx, rc); 905 } 906 } 907 908 static int 909 bdev_uring_init(void) 910 { 911 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 912 sizeof(struct bdev_uring_group_channel), "uring_module"); 913 914 return 0; 915 } 916 917 static void 918 bdev_uring_fini(void) 919 { 920 spdk_io_device_unregister(&uring_if, NULL); 921 } 922 923 SPDK_LOG_REGISTER_COMPONENT(uring) 924