1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 20 #include "spdk/log.h" 21 #include "spdk_internal/uring.h" 22 23 #ifdef SPDK_CONFIG_URING_ZNS 24 #include <linux/blkzoned.h> 25 #define SECTOR_SHIFT 9 26 #endif 27 28 struct bdev_uring_zoned_dev { 29 uint64_t num_zones; 30 uint32_t zone_shift; 31 uint32_t lba_shift; 32 }; 33 34 struct bdev_uring_io_channel { 35 struct bdev_uring_group_channel *group_ch; 36 }; 37 38 struct bdev_uring_group_channel { 39 uint64_t io_inflight; 40 uint64_t io_pending; 41 struct spdk_poller *poller; 42 struct io_uring uring; 43 }; 44 45 struct bdev_uring_task { 46 uint64_t len; 47 struct bdev_uring_io_channel *ch; 48 TAILQ_ENTRY(bdev_uring_task) link; 49 }; 50 51 struct bdev_uring { 52 struct spdk_bdev bdev; 53 struct bdev_uring_zoned_dev zd; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(bdev_uring) link; 57 }; 58 59 static int bdev_uring_init(void); 60 static void bdev_uring_fini(void); 61 static void uring_free_bdev(struct bdev_uring *uring); 62 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 63 64 #define SPDK_URING_QUEUE_DEPTH 512 65 #define MAX_EVENTS_PER_POLL 32 66 67 static int 68 bdev_uring_get_ctx_size(void) 69 { 70 return sizeof(struct bdev_uring_task); 71 } 72 73 static struct spdk_bdev_module uring_if = { 74 .name = "uring", 75 .module_init = bdev_uring_init, 76 .module_fini = bdev_uring_fini, 77 .get_ctx_size = bdev_uring_get_ctx_size, 78 }; 79 80 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 81 82 static int 83 bdev_uring_open(struct bdev_uring *bdev) 84 { 85 int fd; 86 87 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 88 if (fd < 0) { 89 /* Try without O_DIRECT for non-disk files */ 90 fd = open(bdev->filename, O_RDWR | O_NOATIME); 91 if (fd < 0) { 92 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 93 bdev->filename, errno, spdk_strerror(errno)); 94 bdev->fd = -1; 95 return -1; 96 } 97 } 98 99 bdev->fd = fd; 100 101 return 0; 102 } 103 104 static int 105 bdev_uring_close(struct bdev_uring *bdev) 106 { 107 int rc; 108 109 if (bdev->fd == -1) { 110 return 0; 111 } 112 113 rc = close(bdev->fd); 114 if (rc < 0) { 115 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 116 bdev->fd, errno, spdk_strerror(errno)); 117 return -1; 118 } 119 120 bdev->fd = -1; 121 122 return 0; 123 } 124 125 static int64_t 126 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 127 struct bdev_uring_task *uring_task, 128 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 129 { 130 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 131 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 132 struct io_uring_sqe *sqe; 133 134 sqe = io_uring_get_sqe(&group_ch->uring); 135 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 136 io_uring_sqe_set_data(sqe, uring_task); 137 uring_task->len = nbytes; 138 uring_task->ch = uring_ch; 139 140 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 141 iovcnt, nbytes, offset); 142 143 group_ch->io_pending++; 144 return nbytes; 145 } 146 147 static int64_t 148 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 149 struct bdev_uring_task *uring_task, 150 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 151 { 152 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 153 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 154 struct io_uring_sqe *sqe; 155 156 sqe = io_uring_get_sqe(&group_ch->uring); 157 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 158 io_uring_sqe_set_data(sqe, uring_task); 159 uring_task->len = nbytes; 160 uring_task->ch = uring_ch; 161 162 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 163 iovcnt, nbytes, offset); 164 165 group_ch->io_pending++; 166 return nbytes; 167 } 168 169 static int 170 bdev_uring_destruct(void *ctx) 171 { 172 struct bdev_uring *uring = ctx; 173 int rc = 0; 174 175 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 176 rc = bdev_uring_close(uring); 177 if (rc < 0) { 178 SPDK_ERRLOG("bdev_uring_close() failed\n"); 179 } 180 spdk_io_device_unregister(uring, NULL); 181 uring_free_bdev(uring); 182 return rc; 183 } 184 185 static int 186 bdev_uring_reap(struct io_uring *ring, int max) 187 { 188 int i, count, ret; 189 struct io_uring_cqe *cqe; 190 struct bdev_uring_task *uring_task; 191 enum spdk_bdev_io_status status; 192 193 count = 0; 194 for (i = 0; i < max; i++) { 195 ret = io_uring_peek_cqe(ring, &cqe); 196 if (ret != 0) { 197 return ret; 198 } 199 200 if (cqe == NULL) { 201 return count; 202 } 203 204 uring_task = (struct bdev_uring_task *)cqe->user_data; 205 if (cqe->res != (signed)uring_task->len) { 206 status = SPDK_BDEV_IO_STATUS_FAILED; 207 } else { 208 status = SPDK_BDEV_IO_STATUS_SUCCESS; 209 } 210 211 uring_task->ch->group_ch->io_inflight--; 212 io_uring_cqe_seen(ring, cqe); 213 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 214 count++; 215 } 216 217 return count; 218 } 219 220 static int 221 bdev_uring_group_poll(void *arg) 222 { 223 struct bdev_uring_group_channel *group_ch = arg; 224 int to_complete, to_submit; 225 int count, ret; 226 227 to_submit = group_ch->io_pending; 228 229 if (to_submit > 0) { 230 /* If there are I/O to submit, use io_uring_submit here. 231 * It will automatically call spdk_io_uring_enter appropriately. */ 232 ret = io_uring_submit(&group_ch->uring); 233 if (ret < 0) { 234 return SPDK_POLLER_BUSY; 235 } 236 237 group_ch->io_pending = 0; 238 group_ch->io_inflight += to_submit; 239 } 240 241 to_complete = group_ch->io_inflight; 242 count = 0; 243 if (to_complete > 0) { 244 count = bdev_uring_reap(&group_ch->uring, to_complete); 245 } 246 247 if (count + to_submit > 0) { 248 return SPDK_POLLER_BUSY; 249 } else { 250 return SPDK_POLLER_IDLE; 251 } 252 } 253 254 static void 255 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 256 bool success) 257 { 258 if (!success) { 259 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 260 return; 261 } 262 263 switch (bdev_io->type) { 264 case SPDK_BDEV_IO_TYPE_READ: 265 bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 266 ch, 267 (struct bdev_uring_task *)bdev_io->driver_ctx, 268 bdev_io->u.bdev.iovs, 269 bdev_io->u.bdev.iovcnt, 270 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 271 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 272 break; 273 case SPDK_BDEV_IO_TYPE_WRITE: 274 bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 275 ch, 276 (struct bdev_uring_task *)bdev_io->driver_ctx, 277 bdev_io->u.bdev.iovs, 278 bdev_io->u.bdev.iovcnt, 279 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 280 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 281 break; 282 default: 283 SPDK_ERRLOG("Wrong io type\n"); 284 break; 285 } 286 } 287 288 #ifdef SPDK_CONFIG_URING_ZNS 289 static int 290 bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len) 291 { 292 char *path = NULL; 293 char *device = NULL; 294 FILE *file; 295 int ret = 0; 296 297 device = basename(devname); 298 path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr); 299 if (!path) { 300 return -EINVAL; 301 } 302 303 file = fopen(path, "r"); 304 if (!file) { 305 free(path); 306 return -ENOENT; 307 } 308 309 if (!fgets(str, str_len, file)) { 310 ret = -EINVAL; 311 goto close; 312 } 313 314 spdk_str_chomp(str); 315 316 close: 317 free(path); 318 fclose(file); 319 return ret; 320 } 321 322 static int 323 bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val) 324 { 325 char str[128]; 326 int ret; 327 328 ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str)); 329 if (ret) { 330 return ret; 331 } 332 333 *val = spdk_strtol(str, 10); 334 335 return 0; 336 } 337 338 static int 339 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 340 { 341 switch (zones_rep->cond) { 342 case BLK_ZONE_COND_EMPTY: 343 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 344 break; 345 case BLK_ZONE_COND_IMP_OPEN: 346 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 347 break; 348 case BLK_ZONE_COND_EXP_OPEN: 349 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 350 break; 351 case BLK_ZONE_COND_CLOSED: 352 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 353 break; 354 case BLK_ZONE_COND_READONLY: 355 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 356 break; 357 case BLK_ZONE_COND_FULL: 358 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 359 break; 360 case BLK_ZONE_COND_OFFLINE: 361 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 362 break; 363 default: 364 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 365 return -EIO; 366 } 367 return 0; 368 } 369 370 static int 371 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 372 { 373 struct bdev_uring *uring; 374 struct blk_zone_range range; 375 long unsigned zone_mgmt_op; 376 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 377 378 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 379 380 switch (bdev_io->u.zone_mgmt.zone_action) { 381 case SPDK_BDEV_ZONE_RESET: 382 zone_mgmt_op = BLKRESETZONE; 383 break; 384 case SPDK_BDEV_ZONE_OPEN: 385 zone_mgmt_op = BLKOPENZONE; 386 break; 387 case SPDK_BDEV_ZONE_CLOSE: 388 zone_mgmt_op = BLKCLOSEZONE; 389 break; 390 case SPDK_BDEV_ZONE_FINISH: 391 zone_mgmt_op = BLKFINISHZONE; 392 break; 393 default: 394 return -EINVAL; 395 } 396 397 range.sector = (zone_id << uring->zd.lba_shift); 398 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 399 400 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 401 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 402 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 403 return -EINVAL; 404 } 405 406 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 407 408 return 0; 409 } 410 411 static int 412 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 413 { 414 struct bdev_uring *uring; 415 struct blk_zone *zones; 416 struct blk_zone_report *rep; 417 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 418 size_t repsize; 419 uint32_t i, shift; 420 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 421 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 422 423 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 424 shift = uring->zd.lba_shift; 425 426 if ((num_zones > uring->zd.num_zones) || !num_zones) { 427 return -EINVAL; 428 } 429 430 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 431 rep = (struct blk_zone_report *)malloc(repsize); 432 if (!rep) { 433 return -ENOMEM; 434 } 435 436 zones = (struct blk_zone *)(rep + 1); 437 438 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 439 memset(rep, 0, repsize); 440 rep->sector = zone_id; 441 rep->nr_zones = num_zones; 442 443 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 444 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 445 errno, strerror(errno)); 446 free(rep); 447 return -EINVAL; 448 } 449 450 if (!rep->nr_zones) { 451 break; 452 } 453 454 for (i = 0; i < rep->nr_zones; i++) { 455 zone_info->zone_id = ((zones + i)->start >> shift); 456 zone_info->write_pointer = ((zones + i)->wp >> shift); 457 zone_info->capacity = ((zones + i)->capacity >> shift); 458 459 bdev_uring_fill_zone_state(zone_info, zones + i); 460 461 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 462 zone_info++; 463 num_zones--; 464 } 465 } 466 467 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 468 free(rep); 469 return 0; 470 } 471 472 static int 473 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 474 { 475 char str[128]; 476 long int val = 0; 477 uint32_t zinfo; 478 int retval = -1; 479 480 uring->bdev.zoned = false; 481 482 /* Check if this is a zoned block device */ 483 if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) { 484 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno); 485 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 486 /* Only host-aware & host-managed zns devices */ 487 uring->bdev.zoned = true; 488 489 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 490 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 491 goto err_ret; 492 } 493 uring->zd.num_zones = zinfo; 494 495 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 496 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 497 goto err_ret; 498 } 499 500 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 501 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 502 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 503 504 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) { 505 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno)); 506 goto err_ret; 507 } 508 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val; 509 510 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) { 511 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno)); 512 goto err_ret; 513 } 514 uring->bdev.max_active_zones = (uint32_t)val; 515 retval = 0; 516 } else { 517 retval = 0; /* queue/zoned=none */ 518 } 519 520 err_ret: 521 return retval; 522 } 523 #else 524 /* No support for zoned devices */ 525 static int 526 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 527 { 528 return -1; 529 } 530 531 static int 532 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 533 { 534 return -1; 535 } 536 537 static int 538 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 539 { 540 return 0; 541 } 542 #endif 543 544 static int 545 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 546 { 547 548 switch (bdev_io->type) { 549 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 550 return bdev_uring_zone_get_info(bdev_io); 551 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 552 return bdev_uring_zone_management_op(bdev_io); 553 /* Read and write operations must be performed on buffers aligned to 554 * bdev->required_alignment. If user specified unaligned buffers, 555 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 556 case SPDK_BDEV_IO_TYPE_READ: 557 case SPDK_BDEV_IO_TYPE_WRITE: 558 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 559 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 560 return 0; 561 default: 562 return -1; 563 } 564 } 565 566 static void 567 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 568 { 569 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 570 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 571 } 572 } 573 574 static bool 575 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 576 { 577 switch (io_type) { 578 #ifdef SPDK_CONFIG_URING_ZNS 579 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 580 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 581 #endif 582 case SPDK_BDEV_IO_TYPE_READ: 583 case SPDK_BDEV_IO_TYPE_WRITE: 584 return true; 585 default: 586 return false; 587 } 588 } 589 590 static int 591 bdev_uring_create_cb(void *io_device, void *ctx_buf) 592 { 593 struct bdev_uring_io_channel *ch = ctx_buf; 594 595 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 596 597 return 0; 598 } 599 600 static void 601 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 602 { 603 struct bdev_uring_io_channel *ch = ctx_buf; 604 605 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 606 } 607 608 static struct spdk_io_channel * 609 bdev_uring_get_io_channel(void *ctx) 610 { 611 struct bdev_uring *uring = ctx; 612 613 return spdk_get_io_channel(uring); 614 } 615 616 static int 617 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 618 { 619 struct bdev_uring *uring = ctx; 620 621 spdk_json_write_named_object_begin(w, "uring"); 622 623 spdk_json_write_named_string(w, "filename", uring->filename); 624 625 spdk_json_write_object_end(w); 626 627 return 0; 628 } 629 630 static void 631 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 632 { 633 struct bdev_uring *uring = bdev->ctxt; 634 635 spdk_json_write_object_begin(w); 636 637 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 638 639 spdk_json_write_named_object_begin(w, "params"); 640 spdk_json_write_named_string(w, "name", bdev->name); 641 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 642 spdk_json_write_named_string(w, "filename", uring->filename); 643 spdk_json_write_object_end(w); 644 645 spdk_json_write_object_end(w); 646 } 647 648 static const struct spdk_bdev_fn_table uring_fn_table = { 649 .destruct = bdev_uring_destruct, 650 .submit_request = bdev_uring_submit_request, 651 .io_type_supported = bdev_uring_io_type_supported, 652 .get_io_channel = bdev_uring_get_io_channel, 653 .dump_info_json = bdev_uring_dump_info_json, 654 .write_config_json = bdev_uring_write_json_config, 655 }; 656 657 static void 658 uring_free_bdev(struct bdev_uring *uring) 659 { 660 if (uring == NULL) { 661 return; 662 } 663 free(uring->filename); 664 free(uring->bdev.name); 665 free(uring); 666 } 667 668 static int 669 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 670 { 671 struct bdev_uring_group_channel *ch = ctx_buf; 672 673 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 674 * local devices but also devices attached from remote target */ 675 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 676 SPDK_ERRLOG("uring I/O context setup failure\n"); 677 return -1; 678 } 679 680 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 681 return 0; 682 } 683 684 static void 685 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 686 { 687 struct bdev_uring_group_channel *ch = ctx_buf; 688 689 io_uring_queue_exit(&ch->uring); 690 691 spdk_poller_unregister(&ch->poller); 692 } 693 694 struct spdk_bdev * 695 create_uring_bdev(const char *name, const char *filename, uint32_t block_size) 696 { 697 struct bdev_uring *uring; 698 uint32_t detected_block_size; 699 uint64_t bdev_size; 700 int rc; 701 702 uring = calloc(1, sizeof(*uring)); 703 if (!uring) { 704 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 705 return NULL; 706 } 707 708 uring->filename = strdup(filename); 709 if (!uring->filename) { 710 goto error_return; 711 } 712 713 if (bdev_uring_open(uring)) { 714 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno); 715 goto error_return; 716 } 717 718 bdev_size = spdk_fd_get_size(uring->fd); 719 720 uring->bdev.name = strdup(name); 721 if (!uring->bdev.name) { 722 goto error_return; 723 } 724 uring->bdev.product_name = "URING bdev"; 725 uring->bdev.module = &uring_if; 726 727 uring->bdev.write_cache = 1; 728 729 detected_block_size = spdk_fd_get_blocklen(uring->fd); 730 if (block_size == 0) { 731 /* User did not specify block size - use autodetected block size. */ 732 if (detected_block_size == 0) { 733 SPDK_ERRLOG("Block size could not be auto-detected\n"); 734 goto error_return; 735 } 736 block_size = detected_block_size; 737 } else { 738 if (block_size < detected_block_size) { 739 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 740 "auto-detected block size %" PRIu32 "\n", 741 block_size, detected_block_size); 742 goto error_return; 743 } else if (detected_block_size != 0 && block_size != detected_block_size) { 744 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 745 "auto-detected block size %" PRIu32 "\n", 746 block_size, detected_block_size); 747 } 748 } 749 750 if (block_size < 512) { 751 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 752 goto error_return; 753 } 754 755 if (!spdk_u32_is_pow2(block_size)) { 756 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 757 goto error_return; 758 } 759 760 uring->bdev.blocklen = block_size; 761 uring->bdev.required_alignment = spdk_u32log2(block_size); 762 763 rc = bdev_uring_check_zoned_support(uring, name, filename); 764 if (rc) { 765 goto error_return; 766 } 767 768 if (bdev_size % uring->bdev.blocklen != 0) { 769 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 770 bdev_size, uring->bdev.blocklen); 771 goto error_return; 772 } 773 774 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 775 uring->bdev.ctxt = uring; 776 777 uring->bdev.fn_table = &uring_fn_table; 778 779 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 780 sizeof(struct bdev_uring_io_channel), 781 uring->bdev.name); 782 rc = spdk_bdev_register(&uring->bdev); 783 if (rc) { 784 spdk_io_device_unregister(uring, NULL); 785 goto error_return; 786 } 787 788 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 789 return &uring->bdev; 790 791 error_return: 792 bdev_uring_close(uring); 793 uring_free_bdev(uring); 794 return NULL; 795 } 796 797 struct delete_uring_bdev_ctx { 798 spdk_delete_uring_complete cb_fn; 799 void *cb_arg; 800 }; 801 802 static void 803 uring_bdev_unregister_cb(void *arg, int bdeverrno) 804 { 805 struct delete_uring_bdev_ctx *ctx = arg; 806 807 ctx->cb_fn(ctx->cb_arg, bdeverrno); 808 free(ctx); 809 } 810 811 void 812 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 813 { 814 struct delete_uring_bdev_ctx *ctx; 815 int rc; 816 817 ctx = calloc(1, sizeof(*ctx)); 818 if (ctx == NULL) { 819 cb_fn(cb_arg, -ENOMEM); 820 return; 821 } 822 823 ctx->cb_fn = cb_fn; 824 ctx->cb_arg = cb_arg; 825 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 826 if (rc != 0) { 827 uring_bdev_unregister_cb(ctx, rc); 828 } 829 } 830 831 static int 832 bdev_uring_init(void) 833 { 834 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 835 sizeof(struct bdev_uring_group_channel), "uring_module"); 836 837 return 0; 838 } 839 840 static void 841 bdev_uring_fini(void) 842 { 843 spdk_io_device_unregister(&uring_if, NULL); 844 } 845 846 SPDK_LOG_REGISTER_COMPONENT(uring) 847