1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 20 #include "spdk/log.h" 21 #include "spdk_internal/uring.h" 22 23 #ifdef SPDK_CONFIG_URING_ZNS 24 #include <linux/blkzoned.h> 25 #define SECTOR_SHIFT 9 26 #endif 27 28 struct bdev_uring_zoned_dev { 29 uint64_t num_zones; 30 uint32_t zone_shift; 31 uint32_t lba_shift; 32 }; 33 34 struct bdev_uring_io_channel { 35 struct bdev_uring_group_channel *group_ch; 36 }; 37 38 struct bdev_uring_group_channel { 39 uint64_t io_inflight; 40 uint64_t io_pending; 41 struct spdk_poller *poller; 42 struct io_uring uring; 43 }; 44 45 struct bdev_uring_task { 46 uint64_t len; 47 struct bdev_uring_io_channel *ch; 48 TAILQ_ENTRY(bdev_uring_task) link; 49 }; 50 51 struct bdev_uring { 52 struct spdk_bdev bdev; 53 struct bdev_uring_zoned_dev zd; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(bdev_uring) link; 57 }; 58 59 static int bdev_uring_init(void); 60 static void bdev_uring_fini(void); 61 static void uring_free_bdev(struct bdev_uring *uring); 62 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 63 64 #define SPDK_URING_QUEUE_DEPTH 512 65 #define MAX_EVENTS_PER_POLL 32 66 67 static int 68 bdev_uring_get_ctx_size(void) 69 { 70 return sizeof(struct bdev_uring_task); 71 } 72 73 static struct spdk_bdev_module uring_if = { 74 .name = "uring", 75 .module_init = bdev_uring_init, 76 .module_fini = bdev_uring_fini, 77 .get_ctx_size = bdev_uring_get_ctx_size, 78 }; 79 80 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 81 82 static int 83 bdev_uring_open(struct bdev_uring *bdev) 84 { 85 int fd; 86 87 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 88 if (fd < 0) { 89 /* Try without O_DIRECT for non-disk files */ 90 fd = open(bdev->filename, O_RDWR | O_NOATIME); 91 if (fd < 0) { 92 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 93 bdev->filename, errno, spdk_strerror(errno)); 94 bdev->fd = -1; 95 return -1; 96 } 97 } 98 99 bdev->fd = fd; 100 101 return 0; 102 } 103 104 static void 105 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 106 { 107 } 108 109 int 110 bdev_uring_rescan(const char *name) 111 { 112 struct spdk_bdev_desc *desc; 113 struct spdk_bdev *bdev; 114 struct bdev_uring *uring; 115 uint64_t uring_size, blockcnt; 116 int rc; 117 118 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 119 if (rc != 0) { 120 return rc; 121 } 122 123 bdev = spdk_bdev_desc_get_bdev(desc); 124 if (bdev->module != &uring_if) { 125 rc = -ENODEV; 126 goto exit; 127 } 128 129 uring = SPDK_CONTAINEROF(bdev, struct bdev_uring, bdev); 130 uring_size = spdk_fd_get_size(uring->fd); 131 blockcnt = uring_size / bdev->blocklen; 132 133 if (bdev->blockcnt != blockcnt) { 134 SPDK_NOTICELOG("URING device is resized: bdev name %s, old block count %" PRIu64 135 ", new block count %" 136 PRIu64 "\n", 137 uring->filename, 138 bdev->blockcnt, 139 blockcnt); 140 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt); 141 if (rc != 0) { 142 SPDK_ERRLOG("Could not change num blocks for uring bdev: name %s, errno: %d.\n", 143 uring->filename, rc); 144 goto exit; 145 } 146 } 147 148 exit: 149 spdk_bdev_close(desc); 150 return rc; 151 } 152 153 static int 154 bdev_uring_close(struct bdev_uring *bdev) 155 { 156 int rc; 157 158 if (bdev->fd == -1) { 159 return 0; 160 } 161 162 rc = close(bdev->fd); 163 if (rc < 0) { 164 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 165 bdev->fd, errno, spdk_strerror(errno)); 166 return -1; 167 } 168 169 bdev->fd = -1; 170 171 return 0; 172 } 173 174 static int64_t 175 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 176 struct bdev_uring_task *uring_task, 177 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 178 { 179 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 180 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 181 struct io_uring_sqe *sqe; 182 183 sqe = io_uring_get_sqe(&group_ch->uring); 184 if (!sqe) { 185 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 186 return -ENOMEM; 187 } 188 189 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 190 io_uring_sqe_set_data(sqe, uring_task); 191 uring_task->len = nbytes; 192 uring_task->ch = uring_ch; 193 194 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 195 iovcnt, nbytes, offset); 196 197 group_ch->io_pending++; 198 return nbytes; 199 } 200 201 static int64_t 202 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 203 struct bdev_uring_task *uring_task, 204 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 205 { 206 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 207 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 208 struct io_uring_sqe *sqe; 209 210 sqe = io_uring_get_sqe(&group_ch->uring); 211 if (!sqe) { 212 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 213 return -ENOMEM; 214 } 215 216 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 217 io_uring_sqe_set_data(sqe, uring_task); 218 uring_task->len = nbytes; 219 uring_task->ch = uring_ch; 220 221 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 222 iovcnt, nbytes, offset); 223 224 group_ch->io_pending++; 225 return nbytes; 226 } 227 228 static int 229 bdev_uring_destruct(void *ctx) 230 { 231 struct bdev_uring *uring = ctx; 232 int rc = 0; 233 234 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 235 rc = bdev_uring_close(uring); 236 if (rc < 0) { 237 SPDK_ERRLOG("bdev_uring_close() failed\n"); 238 } 239 spdk_io_device_unregister(uring, NULL); 240 uring_free_bdev(uring); 241 return rc; 242 } 243 244 static int 245 bdev_uring_reap(struct io_uring *ring, int max) 246 { 247 int i, count, ret; 248 struct io_uring_cqe *cqe; 249 struct bdev_uring_task *uring_task; 250 enum spdk_bdev_io_status status; 251 252 count = 0; 253 for (i = 0; i < max; i++) { 254 ret = io_uring_peek_cqe(ring, &cqe); 255 if (ret != 0) { 256 return ret; 257 } 258 259 if (cqe == NULL) { 260 return count; 261 } 262 263 uring_task = (struct bdev_uring_task *)cqe->user_data; 264 if (cqe->res != (signed)uring_task->len) { 265 status = SPDK_BDEV_IO_STATUS_FAILED; 266 } else { 267 status = SPDK_BDEV_IO_STATUS_SUCCESS; 268 } 269 270 uring_task->ch->group_ch->io_inflight--; 271 io_uring_cqe_seen(ring, cqe); 272 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 273 count++; 274 } 275 276 return count; 277 } 278 279 static int 280 bdev_uring_group_poll(void *arg) 281 { 282 struct bdev_uring_group_channel *group_ch = arg; 283 int to_complete, to_submit; 284 int count, ret; 285 286 to_submit = group_ch->io_pending; 287 288 if (to_submit > 0) { 289 /* If there are I/O to submit, use io_uring_submit here. 290 * It will automatically call spdk_io_uring_enter appropriately. */ 291 ret = io_uring_submit(&group_ch->uring); 292 if (ret < 0) { 293 return SPDK_POLLER_BUSY; 294 } 295 296 group_ch->io_pending = 0; 297 group_ch->io_inflight += to_submit; 298 } 299 300 to_complete = group_ch->io_inflight; 301 count = 0; 302 if (to_complete > 0) { 303 count = bdev_uring_reap(&group_ch->uring, to_complete); 304 } 305 306 if (count + to_submit > 0) { 307 return SPDK_POLLER_BUSY; 308 } else { 309 return SPDK_POLLER_IDLE; 310 } 311 } 312 313 static void 314 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 315 bool success) 316 { 317 int64_t ret = 0; 318 319 if (!success) { 320 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 321 return; 322 } 323 324 switch (bdev_io->type) { 325 case SPDK_BDEV_IO_TYPE_READ: 326 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 327 ch, 328 (struct bdev_uring_task *)bdev_io->driver_ctx, 329 bdev_io->u.bdev.iovs, 330 bdev_io->u.bdev.iovcnt, 331 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 332 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 333 break; 334 case SPDK_BDEV_IO_TYPE_WRITE: 335 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 336 ch, 337 (struct bdev_uring_task *)bdev_io->driver_ctx, 338 bdev_io->u.bdev.iovs, 339 bdev_io->u.bdev.iovcnt, 340 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 341 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 342 break; 343 default: 344 SPDK_ERRLOG("Wrong io type\n"); 345 break; 346 } 347 348 if (ret == -ENOMEM) { 349 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 350 } 351 } 352 353 #ifdef SPDK_CONFIG_URING_ZNS 354 static int 355 bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len) 356 { 357 char *path = NULL; 358 char *device = NULL; 359 char *name; 360 FILE *file; 361 int ret = 0; 362 363 name = strdup(devname); 364 if (name == NULL) { 365 return -EINVAL; 366 } 367 device = basename(name); 368 path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr); 369 free(name); 370 if (!path) { 371 return -EINVAL; 372 } 373 374 file = fopen(path, "r"); 375 if (!file) { 376 free(path); 377 return -ENOENT; 378 } 379 380 if (!fgets(str, str_len, file)) { 381 ret = -EINVAL; 382 goto close; 383 } 384 385 spdk_str_chomp(str); 386 387 close: 388 free(path); 389 fclose(file); 390 return ret; 391 } 392 393 static int 394 bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val) 395 { 396 char str[128]; 397 int ret; 398 399 ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str)); 400 if (ret) { 401 return ret; 402 } 403 404 *val = spdk_strtol(str, 10); 405 406 return 0; 407 } 408 409 static int 410 bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 411 { 412 switch (zones_rep->type) { 413 case BLK_ZONE_TYPE_CONVENTIONAL: 414 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV; 415 break; 416 case BLK_ZONE_TYPE_SEQWRITE_REQ: 417 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR; 418 break; 419 case BLK_ZONE_TYPE_SEQWRITE_PREF: 420 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP; 421 break; 422 default: 423 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type); 424 return -EIO; 425 } 426 return 0; 427 } 428 429 static int 430 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 431 { 432 switch (zones_rep->cond) { 433 case BLK_ZONE_COND_EMPTY: 434 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 435 break; 436 case BLK_ZONE_COND_IMP_OPEN: 437 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 438 break; 439 case BLK_ZONE_COND_EXP_OPEN: 440 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 441 break; 442 case BLK_ZONE_COND_CLOSED: 443 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 444 break; 445 case BLK_ZONE_COND_READONLY: 446 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 447 break; 448 case BLK_ZONE_COND_FULL: 449 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 450 break; 451 case BLK_ZONE_COND_OFFLINE: 452 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 453 break; 454 case BLK_ZONE_COND_NOT_WP: 455 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP; 456 break; 457 default: 458 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 459 return -EIO; 460 } 461 return 0; 462 } 463 464 static int 465 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 466 { 467 struct bdev_uring *uring; 468 struct blk_zone_range range; 469 long unsigned zone_mgmt_op; 470 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 471 472 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 473 474 switch (bdev_io->u.zone_mgmt.zone_action) { 475 case SPDK_BDEV_ZONE_RESET: 476 zone_mgmt_op = BLKRESETZONE; 477 break; 478 case SPDK_BDEV_ZONE_OPEN: 479 zone_mgmt_op = BLKOPENZONE; 480 break; 481 case SPDK_BDEV_ZONE_CLOSE: 482 zone_mgmt_op = BLKCLOSEZONE; 483 break; 484 case SPDK_BDEV_ZONE_FINISH: 485 zone_mgmt_op = BLKFINISHZONE; 486 break; 487 default: 488 return -EINVAL; 489 } 490 491 range.sector = (zone_id << uring->zd.lba_shift); 492 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 493 494 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 495 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 496 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 497 return -EINVAL; 498 } 499 500 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 501 502 return 0; 503 } 504 505 static int 506 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 507 { 508 struct bdev_uring *uring; 509 struct blk_zone *zones; 510 struct blk_zone_report *rep; 511 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 512 size_t repsize; 513 uint32_t i, shift; 514 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 515 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 516 517 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 518 shift = uring->zd.lba_shift; 519 520 if ((num_zones > uring->zd.num_zones) || !num_zones) { 521 return -EINVAL; 522 } 523 524 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 525 rep = (struct blk_zone_report *)malloc(repsize); 526 if (!rep) { 527 return -ENOMEM; 528 } 529 530 zones = (struct blk_zone *)(rep + 1); 531 532 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 533 memset(rep, 0, repsize); 534 rep->sector = zone_id; 535 rep->nr_zones = num_zones; 536 537 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 538 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 539 errno, strerror(errno)); 540 free(rep); 541 return -EINVAL; 542 } 543 544 if (!rep->nr_zones) { 545 break; 546 } 547 548 for (i = 0; i < rep->nr_zones; i++) { 549 zone_info->zone_id = ((zones + i)->start >> shift); 550 zone_info->write_pointer = ((zones + i)->wp >> shift); 551 zone_info->capacity = ((zones + i)->capacity >> shift); 552 553 bdev_uring_fill_zone_state(zone_info, zones + i); 554 bdev_uring_fill_zone_type(zone_info, zones + i); 555 556 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 557 zone_info++; 558 num_zones--; 559 } 560 } 561 562 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 563 free(rep); 564 return 0; 565 } 566 567 static int 568 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 569 { 570 char str[128]; 571 long int val = 0; 572 uint32_t zinfo; 573 int retval = -1; 574 struct stat sb; 575 char resolved_path[PATH_MAX], *rp; 576 577 uring->bdev.zoned = false; 578 579 /* Follow symlink */ 580 if ((rp = realpath(filename, resolved_path))) { 581 filename = rp; 582 } 583 584 /* Perform check on block devices only */ 585 if (stat(filename, &sb) == 0 && S_ISBLK(sb.st_mode)) { 586 return 0; 587 } 588 589 /* Check if this is a zoned block device */ 590 if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) { 591 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno); 592 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 593 /* Only host-aware & host-managed zns devices */ 594 uring->bdev.zoned = true; 595 596 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 597 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 598 goto err_ret; 599 } 600 uring->zd.num_zones = zinfo; 601 602 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 603 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 604 goto err_ret; 605 } 606 607 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 608 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 609 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 610 611 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) { 612 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno)); 613 goto err_ret; 614 } 615 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val; 616 617 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) { 618 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno)); 619 goto err_ret; 620 } 621 uring->bdev.max_active_zones = (uint32_t)val; 622 retval = 0; 623 } else { 624 retval = 0; /* queue/zoned=none */ 625 } 626 627 err_ret: 628 return retval; 629 } 630 #else 631 /* No support for zoned devices */ 632 static int 633 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 634 { 635 return -1; 636 } 637 638 static int 639 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 640 { 641 return -1; 642 } 643 644 static int 645 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 646 { 647 return 0; 648 } 649 #endif 650 651 static int 652 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 653 { 654 655 switch (bdev_io->type) { 656 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 657 return bdev_uring_zone_get_info(bdev_io); 658 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 659 return bdev_uring_zone_management_op(bdev_io); 660 /* Read and write operations must be performed on buffers aligned to 661 * bdev->required_alignment. If user specified unaligned buffers, 662 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 663 case SPDK_BDEV_IO_TYPE_READ: 664 case SPDK_BDEV_IO_TYPE_WRITE: 665 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 666 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 667 return 0; 668 default: 669 return -1; 670 } 671 } 672 673 static void 674 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 675 { 676 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 677 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 678 } 679 } 680 681 static bool 682 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 683 { 684 switch (io_type) { 685 #ifdef SPDK_CONFIG_URING_ZNS 686 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 687 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 688 #endif 689 case SPDK_BDEV_IO_TYPE_READ: 690 case SPDK_BDEV_IO_TYPE_WRITE: 691 return true; 692 default: 693 return false; 694 } 695 } 696 697 static int 698 bdev_uring_create_cb(void *io_device, void *ctx_buf) 699 { 700 struct bdev_uring_io_channel *ch = ctx_buf; 701 702 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 703 704 return 0; 705 } 706 707 static void 708 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 709 { 710 struct bdev_uring_io_channel *ch = ctx_buf; 711 712 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 713 } 714 715 static struct spdk_io_channel * 716 bdev_uring_get_io_channel(void *ctx) 717 { 718 struct bdev_uring *uring = ctx; 719 720 return spdk_get_io_channel(uring); 721 } 722 723 static int 724 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 725 { 726 struct bdev_uring *uring = ctx; 727 728 spdk_json_write_named_object_begin(w, "uring"); 729 730 spdk_json_write_named_string(w, "filename", uring->filename); 731 732 spdk_json_write_object_end(w); 733 734 return 0; 735 } 736 737 static void 738 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 739 { 740 struct bdev_uring *uring = bdev->ctxt; 741 char uuid_str[SPDK_UUID_STRING_LEN]; 742 743 spdk_json_write_object_begin(w); 744 745 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 746 747 spdk_json_write_named_object_begin(w, "params"); 748 spdk_json_write_named_string(w, "name", bdev->name); 749 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 750 spdk_json_write_named_string(w, "filename", uring->filename); 751 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 752 spdk_json_write_named_string(w, "uuid", uuid_str); 753 spdk_json_write_object_end(w); 754 755 spdk_json_write_object_end(w); 756 } 757 758 static const struct spdk_bdev_fn_table uring_fn_table = { 759 .destruct = bdev_uring_destruct, 760 .submit_request = bdev_uring_submit_request, 761 .io_type_supported = bdev_uring_io_type_supported, 762 .get_io_channel = bdev_uring_get_io_channel, 763 .dump_info_json = bdev_uring_dump_info_json, 764 .write_config_json = bdev_uring_write_json_config, 765 }; 766 767 static void 768 uring_free_bdev(struct bdev_uring *uring) 769 { 770 if (uring == NULL) { 771 return; 772 } 773 free(uring->filename); 774 free(uring->bdev.name); 775 free(uring); 776 } 777 778 static int 779 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 780 { 781 struct bdev_uring_group_channel *ch = ctx_buf; 782 783 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 784 * local devices but also devices attached from remote target */ 785 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 786 SPDK_ERRLOG("uring I/O context setup failure\n"); 787 return -1; 788 } 789 790 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 791 return 0; 792 } 793 794 static void 795 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 796 { 797 struct bdev_uring_group_channel *ch = ctx_buf; 798 799 io_uring_queue_exit(&ch->uring); 800 801 spdk_poller_unregister(&ch->poller); 802 } 803 804 struct spdk_bdev * 805 create_uring_bdev(const struct bdev_uring_opts *opts) 806 { 807 struct bdev_uring *uring; 808 uint32_t detected_block_size; 809 uint64_t bdev_size; 810 int rc; 811 uint32_t block_size = opts->block_size; 812 813 uring = calloc(1, sizeof(*uring)); 814 if (!uring) { 815 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 816 return NULL; 817 } 818 819 uring->filename = strdup(opts->filename); 820 if (!uring->filename) { 821 goto error_return; 822 } 823 824 if (bdev_uring_open(uring)) { 825 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", opts->filename, uring->fd, errno); 826 goto error_return; 827 } 828 829 bdev_size = spdk_fd_get_size(uring->fd); 830 831 uring->bdev.name = strdup(opts->name); 832 if (!uring->bdev.name) { 833 goto error_return; 834 } 835 uring->bdev.product_name = "URING bdev"; 836 uring->bdev.module = &uring_if; 837 838 uring->bdev.write_cache = 0; 839 840 detected_block_size = spdk_fd_get_blocklen(uring->fd); 841 if (block_size == 0) { 842 /* User did not specify block size - use autodetected block size. */ 843 if (detected_block_size == 0) { 844 SPDK_ERRLOG("Block size could not be auto-detected\n"); 845 goto error_return; 846 } 847 block_size = detected_block_size; 848 } else { 849 if (block_size < detected_block_size) { 850 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 851 "auto-detected block size %" PRIu32 "\n", 852 block_size, detected_block_size); 853 goto error_return; 854 } else if (detected_block_size != 0 && block_size != detected_block_size) { 855 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 856 "auto-detected block size %" PRIu32 "\n", 857 block_size, detected_block_size); 858 } 859 } 860 861 if (block_size < 512) { 862 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 863 goto error_return; 864 } 865 866 if (!spdk_u32_is_pow2(block_size)) { 867 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 868 goto error_return; 869 } 870 871 uring->bdev.blocklen = block_size; 872 uring->bdev.required_alignment = spdk_u32log2(block_size); 873 874 rc = bdev_uring_check_zoned_support(uring, opts->name, opts->filename); 875 if (rc) { 876 goto error_return; 877 } 878 879 if (bdev_size % uring->bdev.blocklen != 0) { 880 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 881 bdev_size, uring->bdev.blocklen); 882 goto error_return; 883 } 884 885 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 886 uring->bdev.ctxt = uring; 887 888 uring->bdev.fn_table = &uring_fn_table; 889 890 if (!spdk_mem_all_zero(&opts->uuid, sizeof(opts->uuid))) { 891 spdk_uuid_copy(&uring->bdev.uuid, &opts->uuid); 892 } 893 894 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 895 sizeof(struct bdev_uring_io_channel), 896 uring->bdev.name); 897 rc = spdk_bdev_register(&uring->bdev); 898 if (rc) { 899 spdk_io_device_unregister(uring, NULL); 900 goto error_return; 901 } 902 903 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 904 return &uring->bdev; 905 906 error_return: 907 bdev_uring_close(uring); 908 uring_free_bdev(uring); 909 return NULL; 910 } 911 912 struct delete_uring_bdev_ctx { 913 spdk_delete_uring_complete cb_fn; 914 void *cb_arg; 915 }; 916 917 static void 918 uring_bdev_unregister_cb(void *arg, int bdeverrno) 919 { 920 struct delete_uring_bdev_ctx *ctx = arg; 921 922 ctx->cb_fn(ctx->cb_arg, bdeverrno); 923 free(ctx); 924 } 925 926 void 927 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 928 { 929 struct delete_uring_bdev_ctx *ctx; 930 int rc; 931 932 ctx = calloc(1, sizeof(*ctx)); 933 if (ctx == NULL) { 934 cb_fn(cb_arg, -ENOMEM); 935 return; 936 } 937 938 ctx->cb_fn = cb_fn; 939 ctx->cb_arg = cb_arg; 940 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 941 if (rc != 0) { 942 uring_bdev_unregister_cb(ctx, rc); 943 } 944 } 945 946 static int 947 bdev_uring_init(void) 948 { 949 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 950 sizeof(struct bdev_uring_group_channel), "uring_module"); 951 952 return 0; 953 } 954 955 static void 956 bdev_uring_fini(void) 957 { 958 spdk_io_device_unregister(&uring_if, NULL); 959 } 960 961 SPDK_LOG_REGISTER_COMPONENT(uring) 962