1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 20 #include "spdk/log.h" 21 #include "spdk_internal/uring.h" 22 23 #ifdef SPDK_CONFIG_URING_ZNS 24 #include <linux/blkzoned.h> 25 #define SECTOR_SHIFT 9 26 #endif 27 28 struct bdev_uring_zoned_dev { 29 uint64_t num_zones; 30 uint32_t zone_shift; 31 uint32_t lba_shift; 32 }; 33 34 struct bdev_uring_io_channel { 35 struct bdev_uring_group_channel *group_ch; 36 }; 37 38 struct bdev_uring_group_channel { 39 uint64_t io_inflight; 40 uint64_t io_pending; 41 struct spdk_poller *poller; 42 struct io_uring uring; 43 }; 44 45 struct bdev_uring_task { 46 uint64_t len; 47 struct bdev_uring_io_channel *ch; 48 TAILQ_ENTRY(bdev_uring_task) link; 49 }; 50 51 struct bdev_uring { 52 struct spdk_bdev bdev; 53 struct bdev_uring_zoned_dev zd; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(bdev_uring) link; 57 }; 58 59 static int bdev_uring_init(void); 60 static void bdev_uring_fini(void); 61 static void uring_free_bdev(struct bdev_uring *uring); 62 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 63 64 #define SPDK_URING_QUEUE_DEPTH 512 65 #define MAX_EVENTS_PER_POLL 32 66 67 static int 68 bdev_uring_get_ctx_size(void) 69 { 70 return sizeof(struct bdev_uring_task); 71 } 72 73 static struct spdk_bdev_module uring_if = { 74 .name = "uring", 75 .module_init = bdev_uring_init, 76 .module_fini = bdev_uring_fini, 77 .get_ctx_size = bdev_uring_get_ctx_size, 78 }; 79 80 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 81 82 static int 83 bdev_uring_open(struct bdev_uring *bdev) 84 { 85 int fd; 86 87 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 88 if (fd < 0) { 89 /* Try without O_DIRECT for non-disk files */ 90 fd = open(bdev->filename, O_RDWR | O_NOATIME); 91 if (fd < 0) { 92 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 93 bdev->filename, errno, spdk_strerror(errno)); 94 bdev->fd = -1; 95 return -1; 96 } 97 } 98 99 bdev->fd = fd; 100 101 return 0; 102 } 103 104 static int 105 bdev_uring_close(struct bdev_uring *bdev) 106 { 107 int rc; 108 109 if (bdev->fd == -1) { 110 return 0; 111 } 112 113 rc = close(bdev->fd); 114 if (rc < 0) { 115 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 116 bdev->fd, errno, spdk_strerror(errno)); 117 return -1; 118 } 119 120 bdev->fd = -1; 121 122 return 0; 123 } 124 125 static int64_t 126 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 127 struct bdev_uring_task *uring_task, 128 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 129 { 130 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 131 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 132 struct io_uring_sqe *sqe; 133 134 sqe = io_uring_get_sqe(&group_ch->uring); 135 if (!sqe) { 136 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 137 return -ENOMEM; 138 } 139 140 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 141 io_uring_sqe_set_data(sqe, uring_task); 142 uring_task->len = nbytes; 143 uring_task->ch = uring_ch; 144 145 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 146 iovcnt, nbytes, offset); 147 148 group_ch->io_pending++; 149 return nbytes; 150 } 151 152 static int64_t 153 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 154 struct bdev_uring_task *uring_task, 155 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 156 { 157 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 158 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 159 struct io_uring_sqe *sqe; 160 161 sqe = io_uring_get_sqe(&group_ch->uring); 162 if (!sqe) { 163 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 164 return -ENOMEM; 165 } 166 167 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 168 io_uring_sqe_set_data(sqe, uring_task); 169 uring_task->len = nbytes; 170 uring_task->ch = uring_ch; 171 172 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 173 iovcnt, nbytes, offset); 174 175 group_ch->io_pending++; 176 return nbytes; 177 } 178 179 static int 180 bdev_uring_destruct(void *ctx) 181 { 182 struct bdev_uring *uring = ctx; 183 int rc = 0; 184 185 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 186 rc = bdev_uring_close(uring); 187 if (rc < 0) { 188 SPDK_ERRLOG("bdev_uring_close() failed\n"); 189 } 190 spdk_io_device_unregister(uring, NULL); 191 uring_free_bdev(uring); 192 return rc; 193 } 194 195 static int 196 bdev_uring_reap(struct io_uring *ring, int max) 197 { 198 int i, count, ret; 199 struct io_uring_cqe *cqe; 200 struct bdev_uring_task *uring_task; 201 enum spdk_bdev_io_status status; 202 203 count = 0; 204 for (i = 0; i < max; i++) { 205 ret = io_uring_peek_cqe(ring, &cqe); 206 if (ret != 0) { 207 return ret; 208 } 209 210 if (cqe == NULL) { 211 return count; 212 } 213 214 uring_task = (struct bdev_uring_task *)cqe->user_data; 215 if (cqe->res != (signed)uring_task->len) { 216 status = SPDK_BDEV_IO_STATUS_FAILED; 217 } else { 218 status = SPDK_BDEV_IO_STATUS_SUCCESS; 219 } 220 221 uring_task->ch->group_ch->io_inflight--; 222 io_uring_cqe_seen(ring, cqe); 223 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 224 count++; 225 } 226 227 return count; 228 } 229 230 static int 231 bdev_uring_group_poll(void *arg) 232 { 233 struct bdev_uring_group_channel *group_ch = arg; 234 int to_complete, to_submit; 235 int count, ret; 236 237 to_submit = group_ch->io_pending; 238 239 if (to_submit > 0) { 240 /* If there are I/O to submit, use io_uring_submit here. 241 * It will automatically call spdk_io_uring_enter appropriately. */ 242 ret = io_uring_submit(&group_ch->uring); 243 if (ret < 0) { 244 return SPDK_POLLER_BUSY; 245 } 246 247 group_ch->io_pending = 0; 248 group_ch->io_inflight += to_submit; 249 } 250 251 to_complete = group_ch->io_inflight; 252 count = 0; 253 if (to_complete > 0) { 254 count = bdev_uring_reap(&group_ch->uring, to_complete); 255 } 256 257 if (count + to_submit > 0) { 258 return SPDK_POLLER_BUSY; 259 } else { 260 return SPDK_POLLER_IDLE; 261 } 262 } 263 264 static void 265 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 266 bool success) 267 { 268 int64_t ret = 0; 269 270 if (!success) { 271 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 272 return; 273 } 274 275 switch (bdev_io->type) { 276 case SPDK_BDEV_IO_TYPE_READ: 277 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 278 ch, 279 (struct bdev_uring_task *)bdev_io->driver_ctx, 280 bdev_io->u.bdev.iovs, 281 bdev_io->u.bdev.iovcnt, 282 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 283 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 284 break; 285 case SPDK_BDEV_IO_TYPE_WRITE: 286 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 287 ch, 288 (struct bdev_uring_task *)bdev_io->driver_ctx, 289 bdev_io->u.bdev.iovs, 290 bdev_io->u.bdev.iovcnt, 291 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 292 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 293 break; 294 default: 295 SPDK_ERRLOG("Wrong io type\n"); 296 break; 297 } 298 299 if (ret == -ENOMEM) { 300 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 301 } 302 } 303 304 #ifdef SPDK_CONFIG_URING_ZNS 305 static int 306 bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len) 307 { 308 char *path = NULL; 309 char *device = NULL; 310 FILE *file; 311 int ret = 0; 312 313 device = basename(devname); 314 path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr); 315 if (!path) { 316 return -EINVAL; 317 } 318 319 file = fopen(path, "r"); 320 if (!file) { 321 free(path); 322 return -ENOENT; 323 } 324 325 if (!fgets(str, str_len, file)) { 326 ret = -EINVAL; 327 goto close; 328 } 329 330 spdk_str_chomp(str); 331 332 close: 333 free(path); 334 fclose(file); 335 return ret; 336 } 337 338 static int 339 bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val) 340 { 341 char str[128]; 342 int ret; 343 344 ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str)); 345 if (ret) { 346 return ret; 347 } 348 349 *val = spdk_strtol(str, 10); 350 351 return 0; 352 } 353 354 static int 355 bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 356 { 357 switch (zones_rep->type) { 358 case BLK_ZONE_TYPE_CONVENTIONAL: 359 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV; 360 break; 361 case BLK_ZONE_TYPE_SEQWRITE_REQ: 362 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR; 363 break; 364 case BLK_ZONE_TYPE_SEQWRITE_PREF: 365 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP; 366 break; 367 default: 368 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type); 369 return -EIO; 370 } 371 return 0; 372 } 373 374 static int 375 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 376 { 377 switch (zones_rep->cond) { 378 case BLK_ZONE_COND_EMPTY: 379 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 380 break; 381 case BLK_ZONE_COND_IMP_OPEN: 382 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 383 break; 384 case BLK_ZONE_COND_EXP_OPEN: 385 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 386 break; 387 case BLK_ZONE_COND_CLOSED: 388 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 389 break; 390 case BLK_ZONE_COND_READONLY: 391 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 392 break; 393 case BLK_ZONE_COND_FULL: 394 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 395 break; 396 case BLK_ZONE_COND_OFFLINE: 397 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 398 break; 399 case BLK_ZONE_COND_NOT_WP: 400 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP; 401 break; 402 default: 403 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 404 return -EIO; 405 } 406 return 0; 407 } 408 409 static int 410 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 411 { 412 struct bdev_uring *uring; 413 struct blk_zone_range range; 414 long unsigned zone_mgmt_op; 415 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 416 417 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 418 419 switch (bdev_io->u.zone_mgmt.zone_action) { 420 case SPDK_BDEV_ZONE_RESET: 421 zone_mgmt_op = BLKRESETZONE; 422 break; 423 case SPDK_BDEV_ZONE_OPEN: 424 zone_mgmt_op = BLKOPENZONE; 425 break; 426 case SPDK_BDEV_ZONE_CLOSE: 427 zone_mgmt_op = BLKCLOSEZONE; 428 break; 429 case SPDK_BDEV_ZONE_FINISH: 430 zone_mgmt_op = BLKFINISHZONE; 431 break; 432 default: 433 return -EINVAL; 434 } 435 436 range.sector = (zone_id << uring->zd.lba_shift); 437 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 438 439 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 440 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 441 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 442 return -EINVAL; 443 } 444 445 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 446 447 return 0; 448 } 449 450 static int 451 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 452 { 453 struct bdev_uring *uring; 454 struct blk_zone *zones; 455 struct blk_zone_report *rep; 456 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 457 size_t repsize; 458 uint32_t i, shift; 459 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 460 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 461 462 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 463 shift = uring->zd.lba_shift; 464 465 if ((num_zones > uring->zd.num_zones) || !num_zones) { 466 return -EINVAL; 467 } 468 469 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 470 rep = (struct blk_zone_report *)malloc(repsize); 471 if (!rep) { 472 return -ENOMEM; 473 } 474 475 zones = (struct blk_zone *)(rep + 1); 476 477 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 478 memset(rep, 0, repsize); 479 rep->sector = zone_id; 480 rep->nr_zones = num_zones; 481 482 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 483 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 484 errno, strerror(errno)); 485 free(rep); 486 return -EINVAL; 487 } 488 489 if (!rep->nr_zones) { 490 break; 491 } 492 493 for (i = 0; i < rep->nr_zones; i++) { 494 zone_info->zone_id = ((zones + i)->start >> shift); 495 zone_info->write_pointer = ((zones + i)->wp >> shift); 496 zone_info->capacity = ((zones + i)->capacity >> shift); 497 498 bdev_uring_fill_zone_state(zone_info, zones + i); 499 bdev_uring_fill_zone_type(zone_info, zones + i); 500 501 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 502 zone_info++; 503 num_zones--; 504 } 505 } 506 507 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 508 free(rep); 509 return 0; 510 } 511 512 static int 513 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 514 { 515 char str[128]; 516 long int val = 0; 517 uint32_t zinfo; 518 int retval = -1; 519 520 uring->bdev.zoned = false; 521 522 /* Check if this is a zoned block device */ 523 if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) { 524 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno); 525 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 526 /* Only host-aware & host-managed zns devices */ 527 uring->bdev.zoned = true; 528 529 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 530 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 531 goto err_ret; 532 } 533 uring->zd.num_zones = zinfo; 534 535 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 536 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 537 goto err_ret; 538 } 539 540 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 541 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 542 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 543 544 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) { 545 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno)); 546 goto err_ret; 547 } 548 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val; 549 550 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) { 551 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno)); 552 goto err_ret; 553 } 554 uring->bdev.max_active_zones = (uint32_t)val; 555 retval = 0; 556 } else { 557 retval = 0; /* queue/zoned=none */ 558 } 559 560 err_ret: 561 return retval; 562 } 563 #else 564 /* No support for zoned devices */ 565 static int 566 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 567 { 568 return -1; 569 } 570 571 static int 572 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 573 { 574 return -1; 575 } 576 577 static int 578 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 579 { 580 return 0; 581 } 582 #endif 583 584 static int 585 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 586 { 587 588 switch (bdev_io->type) { 589 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 590 return bdev_uring_zone_get_info(bdev_io); 591 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 592 return bdev_uring_zone_management_op(bdev_io); 593 /* Read and write operations must be performed on buffers aligned to 594 * bdev->required_alignment. If user specified unaligned buffers, 595 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 596 case SPDK_BDEV_IO_TYPE_READ: 597 case SPDK_BDEV_IO_TYPE_WRITE: 598 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 599 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 600 return 0; 601 default: 602 return -1; 603 } 604 } 605 606 static void 607 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 608 { 609 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 610 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 611 } 612 } 613 614 static bool 615 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 616 { 617 switch (io_type) { 618 #ifdef SPDK_CONFIG_URING_ZNS 619 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 620 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 621 #endif 622 case SPDK_BDEV_IO_TYPE_READ: 623 case SPDK_BDEV_IO_TYPE_WRITE: 624 return true; 625 default: 626 return false; 627 } 628 } 629 630 static int 631 bdev_uring_create_cb(void *io_device, void *ctx_buf) 632 { 633 struct bdev_uring_io_channel *ch = ctx_buf; 634 635 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 636 637 return 0; 638 } 639 640 static void 641 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 642 { 643 struct bdev_uring_io_channel *ch = ctx_buf; 644 645 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 646 } 647 648 static struct spdk_io_channel * 649 bdev_uring_get_io_channel(void *ctx) 650 { 651 struct bdev_uring *uring = ctx; 652 653 return spdk_get_io_channel(uring); 654 } 655 656 static int 657 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 658 { 659 struct bdev_uring *uring = ctx; 660 661 spdk_json_write_named_object_begin(w, "uring"); 662 663 spdk_json_write_named_string(w, "filename", uring->filename); 664 665 spdk_json_write_object_end(w); 666 667 return 0; 668 } 669 670 static void 671 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 672 { 673 struct bdev_uring *uring = bdev->ctxt; 674 675 spdk_json_write_object_begin(w); 676 677 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 678 679 spdk_json_write_named_object_begin(w, "params"); 680 spdk_json_write_named_string(w, "name", bdev->name); 681 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 682 spdk_json_write_named_string(w, "filename", uring->filename); 683 spdk_json_write_object_end(w); 684 685 spdk_json_write_object_end(w); 686 } 687 688 static const struct spdk_bdev_fn_table uring_fn_table = { 689 .destruct = bdev_uring_destruct, 690 .submit_request = bdev_uring_submit_request, 691 .io_type_supported = bdev_uring_io_type_supported, 692 .get_io_channel = bdev_uring_get_io_channel, 693 .dump_info_json = bdev_uring_dump_info_json, 694 .write_config_json = bdev_uring_write_json_config, 695 }; 696 697 static void 698 uring_free_bdev(struct bdev_uring *uring) 699 { 700 if (uring == NULL) { 701 return; 702 } 703 free(uring->filename); 704 free(uring->bdev.name); 705 free(uring); 706 } 707 708 static int 709 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 710 { 711 struct bdev_uring_group_channel *ch = ctx_buf; 712 713 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 714 * local devices but also devices attached from remote target */ 715 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 716 SPDK_ERRLOG("uring I/O context setup failure\n"); 717 return -1; 718 } 719 720 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 721 return 0; 722 } 723 724 static void 725 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 726 { 727 struct bdev_uring_group_channel *ch = ctx_buf; 728 729 io_uring_queue_exit(&ch->uring); 730 731 spdk_poller_unregister(&ch->poller); 732 } 733 734 struct spdk_bdev * 735 create_uring_bdev(const char *name, const char *filename, uint32_t block_size) 736 { 737 struct bdev_uring *uring; 738 uint32_t detected_block_size; 739 uint64_t bdev_size; 740 int rc; 741 742 uring = calloc(1, sizeof(*uring)); 743 if (!uring) { 744 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 745 return NULL; 746 } 747 748 uring->filename = strdup(filename); 749 if (!uring->filename) { 750 goto error_return; 751 } 752 753 if (bdev_uring_open(uring)) { 754 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno); 755 goto error_return; 756 } 757 758 bdev_size = spdk_fd_get_size(uring->fd); 759 760 uring->bdev.name = strdup(name); 761 if (!uring->bdev.name) { 762 goto error_return; 763 } 764 uring->bdev.product_name = "URING bdev"; 765 uring->bdev.module = &uring_if; 766 767 uring->bdev.write_cache = 1; 768 769 detected_block_size = spdk_fd_get_blocklen(uring->fd); 770 if (block_size == 0) { 771 /* User did not specify block size - use autodetected block size. */ 772 if (detected_block_size == 0) { 773 SPDK_ERRLOG("Block size could not be auto-detected\n"); 774 goto error_return; 775 } 776 block_size = detected_block_size; 777 } else { 778 if (block_size < detected_block_size) { 779 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 780 "auto-detected block size %" PRIu32 "\n", 781 block_size, detected_block_size); 782 goto error_return; 783 } else if (detected_block_size != 0 && block_size != detected_block_size) { 784 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 785 "auto-detected block size %" PRIu32 "\n", 786 block_size, detected_block_size); 787 } 788 } 789 790 if (block_size < 512) { 791 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 792 goto error_return; 793 } 794 795 if (!spdk_u32_is_pow2(block_size)) { 796 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 797 goto error_return; 798 } 799 800 uring->bdev.blocklen = block_size; 801 uring->bdev.required_alignment = spdk_u32log2(block_size); 802 803 rc = bdev_uring_check_zoned_support(uring, name, filename); 804 if (rc) { 805 goto error_return; 806 } 807 808 if (bdev_size % uring->bdev.blocklen != 0) { 809 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 810 bdev_size, uring->bdev.blocklen); 811 goto error_return; 812 } 813 814 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 815 uring->bdev.ctxt = uring; 816 817 uring->bdev.fn_table = &uring_fn_table; 818 819 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 820 sizeof(struct bdev_uring_io_channel), 821 uring->bdev.name); 822 rc = spdk_bdev_register(&uring->bdev); 823 if (rc) { 824 spdk_io_device_unregister(uring, NULL); 825 goto error_return; 826 } 827 828 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 829 return &uring->bdev; 830 831 error_return: 832 bdev_uring_close(uring); 833 uring_free_bdev(uring); 834 return NULL; 835 } 836 837 struct delete_uring_bdev_ctx { 838 spdk_delete_uring_complete cb_fn; 839 void *cb_arg; 840 }; 841 842 static void 843 uring_bdev_unregister_cb(void *arg, int bdeverrno) 844 { 845 struct delete_uring_bdev_ctx *ctx = arg; 846 847 ctx->cb_fn(ctx->cb_arg, bdeverrno); 848 free(ctx); 849 } 850 851 void 852 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 853 { 854 struct delete_uring_bdev_ctx *ctx; 855 int rc; 856 857 ctx = calloc(1, sizeof(*ctx)); 858 if (ctx == NULL) { 859 cb_fn(cb_arg, -ENOMEM); 860 return; 861 } 862 863 ctx->cb_fn = cb_fn; 864 ctx->cb_arg = cb_arg; 865 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 866 if (rc != 0) { 867 uring_bdev_unregister_cb(ctx, rc); 868 } 869 } 870 871 static int 872 bdev_uring_init(void) 873 { 874 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 875 sizeof(struct bdev_uring_group_channel), "uring_module"); 876 877 return 0; 878 } 879 880 static void 881 bdev_uring_fini(void) 882 { 883 spdk_io_device_unregister(&uring_if, NULL); 884 } 885 886 SPDK_LOG_REGISTER_COMPONENT(uring) 887