1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 20 #include "spdk/log.h" 21 #include "spdk_internal/uring.h" 22 23 #ifdef SPDK_CONFIG_URING_ZNS 24 #include <linux/blkzoned.h> 25 #define SECTOR_SHIFT 9 26 #endif 27 28 struct bdev_uring_zoned_dev { 29 uint64_t num_zones; 30 uint32_t zone_shift; 31 uint32_t lba_shift; 32 }; 33 34 struct bdev_uring_io_channel { 35 struct bdev_uring_group_channel *group_ch; 36 }; 37 38 struct bdev_uring_group_channel { 39 uint64_t io_inflight; 40 uint64_t io_pending; 41 struct spdk_poller *poller; 42 struct io_uring uring; 43 }; 44 45 struct bdev_uring_task { 46 uint64_t len; 47 struct bdev_uring_io_channel *ch; 48 TAILQ_ENTRY(bdev_uring_task) link; 49 }; 50 51 struct bdev_uring { 52 struct spdk_bdev bdev; 53 struct bdev_uring_zoned_dev zd; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(bdev_uring) link; 57 }; 58 59 static int bdev_uring_init(void); 60 static void bdev_uring_fini(void); 61 static void uring_free_bdev(struct bdev_uring *uring); 62 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 63 64 #define SPDK_URING_QUEUE_DEPTH 512 65 #define MAX_EVENTS_PER_POLL 32 66 67 static int 68 bdev_uring_get_ctx_size(void) 69 { 70 return sizeof(struct bdev_uring_task); 71 } 72 73 static struct spdk_bdev_module uring_if = { 74 .name = "uring", 75 .module_init = bdev_uring_init, 76 .module_fini = bdev_uring_fini, 77 .get_ctx_size = bdev_uring_get_ctx_size, 78 }; 79 80 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 81 82 static int 83 bdev_uring_open(struct bdev_uring *bdev) 84 { 85 int fd; 86 87 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 88 if (fd < 0) { 89 /* Try without O_DIRECT for non-disk files */ 90 fd = open(bdev->filename, O_RDWR | O_NOATIME); 91 if (fd < 0) { 92 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 93 bdev->filename, errno, spdk_strerror(errno)); 94 bdev->fd = -1; 95 return -1; 96 } 97 } 98 99 bdev->fd = fd; 100 101 return 0; 102 } 103 104 static int 105 bdev_uring_close(struct bdev_uring *bdev) 106 { 107 int rc; 108 109 if (bdev->fd == -1) { 110 return 0; 111 } 112 113 rc = close(bdev->fd); 114 if (rc < 0) { 115 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 116 bdev->fd, errno, spdk_strerror(errno)); 117 return -1; 118 } 119 120 bdev->fd = -1; 121 122 return 0; 123 } 124 125 static int64_t 126 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 127 struct bdev_uring_task *uring_task, 128 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 129 { 130 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 131 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 132 struct io_uring_sqe *sqe; 133 134 sqe = io_uring_get_sqe(&group_ch->uring); 135 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 136 io_uring_sqe_set_data(sqe, uring_task); 137 uring_task->len = nbytes; 138 uring_task->ch = uring_ch; 139 140 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 141 iovcnt, nbytes, offset); 142 143 group_ch->io_pending++; 144 return nbytes; 145 } 146 147 static int64_t 148 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 149 struct bdev_uring_task *uring_task, 150 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 151 { 152 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 153 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 154 struct io_uring_sqe *sqe; 155 156 sqe = io_uring_get_sqe(&group_ch->uring); 157 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 158 io_uring_sqe_set_data(sqe, uring_task); 159 uring_task->len = nbytes; 160 uring_task->ch = uring_ch; 161 162 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 163 iovcnt, nbytes, offset); 164 165 group_ch->io_pending++; 166 return nbytes; 167 } 168 169 static int 170 bdev_uring_destruct(void *ctx) 171 { 172 struct bdev_uring *uring = ctx; 173 int rc = 0; 174 175 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 176 rc = bdev_uring_close(uring); 177 if (rc < 0) { 178 SPDK_ERRLOG("bdev_uring_close() failed\n"); 179 } 180 spdk_io_device_unregister(uring, NULL); 181 uring_free_bdev(uring); 182 return rc; 183 } 184 185 static int 186 bdev_uring_reap(struct io_uring *ring, int max) 187 { 188 int i, count, ret; 189 struct io_uring_cqe *cqe; 190 struct bdev_uring_task *uring_task; 191 enum spdk_bdev_io_status status; 192 193 count = 0; 194 for (i = 0; i < max; i++) { 195 ret = io_uring_peek_cqe(ring, &cqe); 196 if (ret != 0) { 197 return ret; 198 } 199 200 if (cqe == NULL) { 201 return count; 202 } 203 204 uring_task = (struct bdev_uring_task *)cqe->user_data; 205 if (cqe->res != (signed)uring_task->len) { 206 status = SPDK_BDEV_IO_STATUS_FAILED; 207 } else { 208 status = SPDK_BDEV_IO_STATUS_SUCCESS; 209 } 210 211 uring_task->ch->group_ch->io_inflight--; 212 io_uring_cqe_seen(ring, cqe); 213 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 214 count++; 215 } 216 217 return count; 218 } 219 220 static int 221 bdev_uring_group_poll(void *arg) 222 { 223 struct bdev_uring_group_channel *group_ch = arg; 224 int to_complete, to_submit; 225 int count, ret; 226 227 to_submit = group_ch->io_pending; 228 229 if (to_submit > 0) { 230 /* If there are I/O to submit, use io_uring_submit here. 231 * It will automatically call spdk_io_uring_enter appropriately. */ 232 ret = io_uring_submit(&group_ch->uring); 233 if (ret < 0) { 234 return SPDK_POLLER_BUSY; 235 } 236 237 group_ch->io_pending = 0; 238 group_ch->io_inflight += to_submit; 239 } 240 241 to_complete = group_ch->io_inflight; 242 count = 0; 243 if (to_complete > 0) { 244 count = bdev_uring_reap(&group_ch->uring, to_complete); 245 } 246 247 if (count + to_submit > 0) { 248 return SPDK_POLLER_BUSY; 249 } else { 250 return SPDK_POLLER_IDLE; 251 } 252 } 253 254 static void 255 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 256 bool success) 257 { 258 if (!success) { 259 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 260 return; 261 } 262 263 switch (bdev_io->type) { 264 case SPDK_BDEV_IO_TYPE_READ: 265 bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 266 ch, 267 (struct bdev_uring_task *)bdev_io->driver_ctx, 268 bdev_io->u.bdev.iovs, 269 bdev_io->u.bdev.iovcnt, 270 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 271 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 272 break; 273 case SPDK_BDEV_IO_TYPE_WRITE: 274 bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 275 ch, 276 (struct bdev_uring_task *)bdev_io->driver_ctx, 277 bdev_io->u.bdev.iovs, 278 bdev_io->u.bdev.iovcnt, 279 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 280 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 281 break; 282 default: 283 SPDK_ERRLOG("Wrong io type\n"); 284 break; 285 } 286 } 287 288 #ifdef SPDK_CONFIG_URING_ZNS 289 static int 290 bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len) 291 { 292 char *path = NULL; 293 char *device = NULL; 294 FILE *file; 295 int ret = 0; 296 297 device = basename(devname); 298 path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr); 299 if (!path) { 300 return -EINVAL; 301 } 302 303 file = fopen(path, "r"); 304 if (!file) { 305 free(path); 306 return -ENOENT; 307 } 308 309 if (!fgets(str, str_len, file)) { 310 ret = -EINVAL; 311 goto close; 312 } 313 314 spdk_str_chomp(str); 315 316 close: 317 free(path); 318 fclose(file); 319 return ret; 320 } 321 322 static int 323 bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val) 324 { 325 char str[128]; 326 int ret; 327 328 ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str)); 329 if (ret) { 330 return ret; 331 } 332 333 *val = spdk_strtol(str, 10); 334 335 return 0; 336 } 337 338 static int 339 bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 340 { 341 switch (zones_rep->type) { 342 case BLK_ZONE_TYPE_CONVENTIONAL: 343 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV; 344 break; 345 case BLK_ZONE_TYPE_SEQWRITE_REQ: 346 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR; 347 break; 348 case BLK_ZONE_TYPE_SEQWRITE_PREF: 349 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP; 350 break; 351 default: 352 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type); 353 return -EIO; 354 } 355 return 0; 356 } 357 358 static int 359 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 360 { 361 switch (zones_rep->cond) { 362 case BLK_ZONE_COND_EMPTY: 363 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 364 break; 365 case BLK_ZONE_COND_IMP_OPEN: 366 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 367 break; 368 case BLK_ZONE_COND_EXP_OPEN: 369 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 370 break; 371 case BLK_ZONE_COND_CLOSED: 372 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 373 break; 374 case BLK_ZONE_COND_READONLY: 375 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 376 break; 377 case BLK_ZONE_COND_FULL: 378 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 379 break; 380 case BLK_ZONE_COND_OFFLINE: 381 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 382 break; 383 case BLK_ZONE_COND_NOT_WP: 384 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP; 385 break; 386 default: 387 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 388 return -EIO; 389 } 390 return 0; 391 } 392 393 static int 394 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 395 { 396 struct bdev_uring *uring; 397 struct blk_zone_range range; 398 long unsigned zone_mgmt_op; 399 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 400 401 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 402 403 switch (bdev_io->u.zone_mgmt.zone_action) { 404 case SPDK_BDEV_ZONE_RESET: 405 zone_mgmt_op = BLKRESETZONE; 406 break; 407 case SPDK_BDEV_ZONE_OPEN: 408 zone_mgmt_op = BLKOPENZONE; 409 break; 410 case SPDK_BDEV_ZONE_CLOSE: 411 zone_mgmt_op = BLKCLOSEZONE; 412 break; 413 case SPDK_BDEV_ZONE_FINISH: 414 zone_mgmt_op = BLKFINISHZONE; 415 break; 416 default: 417 return -EINVAL; 418 } 419 420 range.sector = (zone_id << uring->zd.lba_shift); 421 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 422 423 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 424 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 425 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 426 return -EINVAL; 427 } 428 429 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 430 431 return 0; 432 } 433 434 static int 435 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 436 { 437 struct bdev_uring *uring; 438 struct blk_zone *zones; 439 struct blk_zone_report *rep; 440 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 441 size_t repsize; 442 uint32_t i, shift; 443 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 444 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 445 446 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 447 shift = uring->zd.lba_shift; 448 449 if ((num_zones > uring->zd.num_zones) || !num_zones) { 450 return -EINVAL; 451 } 452 453 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 454 rep = (struct blk_zone_report *)malloc(repsize); 455 if (!rep) { 456 return -ENOMEM; 457 } 458 459 zones = (struct blk_zone *)(rep + 1); 460 461 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 462 memset(rep, 0, repsize); 463 rep->sector = zone_id; 464 rep->nr_zones = num_zones; 465 466 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 467 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 468 errno, strerror(errno)); 469 free(rep); 470 return -EINVAL; 471 } 472 473 if (!rep->nr_zones) { 474 break; 475 } 476 477 for (i = 0; i < rep->nr_zones; i++) { 478 zone_info->zone_id = ((zones + i)->start >> shift); 479 zone_info->write_pointer = ((zones + i)->wp >> shift); 480 zone_info->capacity = ((zones + i)->capacity >> shift); 481 482 bdev_uring_fill_zone_state(zone_info, zones + i); 483 bdev_uring_fill_zone_type(zone_info, zones + i); 484 485 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 486 zone_info++; 487 num_zones--; 488 } 489 } 490 491 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 492 free(rep); 493 return 0; 494 } 495 496 static int 497 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 498 { 499 char str[128]; 500 long int val = 0; 501 uint32_t zinfo; 502 int retval = -1; 503 504 uring->bdev.zoned = false; 505 506 /* Check if this is a zoned block device */ 507 if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) { 508 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno); 509 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 510 /* Only host-aware & host-managed zns devices */ 511 uring->bdev.zoned = true; 512 513 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 514 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 515 goto err_ret; 516 } 517 uring->zd.num_zones = zinfo; 518 519 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 520 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 521 goto err_ret; 522 } 523 524 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 525 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 526 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 527 528 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) { 529 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno)); 530 goto err_ret; 531 } 532 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val; 533 534 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) { 535 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno)); 536 goto err_ret; 537 } 538 uring->bdev.max_active_zones = (uint32_t)val; 539 retval = 0; 540 } else { 541 retval = 0; /* queue/zoned=none */ 542 } 543 544 err_ret: 545 return retval; 546 } 547 #else 548 /* No support for zoned devices */ 549 static int 550 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 551 { 552 return -1; 553 } 554 555 static int 556 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 557 { 558 return -1; 559 } 560 561 static int 562 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 563 { 564 return 0; 565 } 566 #endif 567 568 static int 569 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 570 { 571 572 switch (bdev_io->type) { 573 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 574 return bdev_uring_zone_get_info(bdev_io); 575 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 576 return bdev_uring_zone_management_op(bdev_io); 577 /* Read and write operations must be performed on buffers aligned to 578 * bdev->required_alignment. If user specified unaligned buffers, 579 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 580 case SPDK_BDEV_IO_TYPE_READ: 581 case SPDK_BDEV_IO_TYPE_WRITE: 582 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 583 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 584 return 0; 585 default: 586 return -1; 587 } 588 } 589 590 static void 591 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 592 { 593 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 594 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 595 } 596 } 597 598 static bool 599 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 600 { 601 switch (io_type) { 602 #ifdef SPDK_CONFIG_URING_ZNS 603 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 604 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 605 #endif 606 case SPDK_BDEV_IO_TYPE_READ: 607 case SPDK_BDEV_IO_TYPE_WRITE: 608 return true; 609 default: 610 return false; 611 } 612 } 613 614 static int 615 bdev_uring_create_cb(void *io_device, void *ctx_buf) 616 { 617 struct bdev_uring_io_channel *ch = ctx_buf; 618 619 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 620 621 return 0; 622 } 623 624 static void 625 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 626 { 627 struct bdev_uring_io_channel *ch = ctx_buf; 628 629 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 630 } 631 632 static struct spdk_io_channel * 633 bdev_uring_get_io_channel(void *ctx) 634 { 635 struct bdev_uring *uring = ctx; 636 637 return spdk_get_io_channel(uring); 638 } 639 640 static int 641 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 642 { 643 struct bdev_uring *uring = ctx; 644 645 spdk_json_write_named_object_begin(w, "uring"); 646 647 spdk_json_write_named_string(w, "filename", uring->filename); 648 649 spdk_json_write_object_end(w); 650 651 return 0; 652 } 653 654 static void 655 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 656 { 657 struct bdev_uring *uring = bdev->ctxt; 658 659 spdk_json_write_object_begin(w); 660 661 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 662 663 spdk_json_write_named_object_begin(w, "params"); 664 spdk_json_write_named_string(w, "name", bdev->name); 665 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 666 spdk_json_write_named_string(w, "filename", uring->filename); 667 spdk_json_write_object_end(w); 668 669 spdk_json_write_object_end(w); 670 } 671 672 static const struct spdk_bdev_fn_table uring_fn_table = { 673 .destruct = bdev_uring_destruct, 674 .submit_request = bdev_uring_submit_request, 675 .io_type_supported = bdev_uring_io_type_supported, 676 .get_io_channel = bdev_uring_get_io_channel, 677 .dump_info_json = bdev_uring_dump_info_json, 678 .write_config_json = bdev_uring_write_json_config, 679 }; 680 681 static void 682 uring_free_bdev(struct bdev_uring *uring) 683 { 684 if (uring == NULL) { 685 return; 686 } 687 free(uring->filename); 688 free(uring->bdev.name); 689 free(uring); 690 } 691 692 static int 693 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 694 { 695 struct bdev_uring_group_channel *ch = ctx_buf; 696 697 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 698 * local devices but also devices attached from remote target */ 699 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 700 SPDK_ERRLOG("uring I/O context setup failure\n"); 701 return -1; 702 } 703 704 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 705 return 0; 706 } 707 708 static void 709 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 710 { 711 struct bdev_uring_group_channel *ch = ctx_buf; 712 713 io_uring_queue_exit(&ch->uring); 714 715 spdk_poller_unregister(&ch->poller); 716 } 717 718 struct spdk_bdev * 719 create_uring_bdev(const char *name, const char *filename, uint32_t block_size) 720 { 721 struct bdev_uring *uring; 722 uint32_t detected_block_size; 723 uint64_t bdev_size; 724 int rc; 725 726 uring = calloc(1, sizeof(*uring)); 727 if (!uring) { 728 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 729 return NULL; 730 } 731 732 uring->filename = strdup(filename); 733 if (!uring->filename) { 734 goto error_return; 735 } 736 737 if (bdev_uring_open(uring)) { 738 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno); 739 goto error_return; 740 } 741 742 bdev_size = spdk_fd_get_size(uring->fd); 743 744 uring->bdev.name = strdup(name); 745 if (!uring->bdev.name) { 746 goto error_return; 747 } 748 uring->bdev.product_name = "URING bdev"; 749 uring->bdev.module = &uring_if; 750 751 uring->bdev.write_cache = 1; 752 753 detected_block_size = spdk_fd_get_blocklen(uring->fd); 754 if (block_size == 0) { 755 /* User did not specify block size - use autodetected block size. */ 756 if (detected_block_size == 0) { 757 SPDK_ERRLOG("Block size could not be auto-detected\n"); 758 goto error_return; 759 } 760 block_size = detected_block_size; 761 } else { 762 if (block_size < detected_block_size) { 763 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 764 "auto-detected block size %" PRIu32 "\n", 765 block_size, detected_block_size); 766 goto error_return; 767 } else if (detected_block_size != 0 && block_size != detected_block_size) { 768 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 769 "auto-detected block size %" PRIu32 "\n", 770 block_size, detected_block_size); 771 } 772 } 773 774 if (block_size < 512) { 775 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 776 goto error_return; 777 } 778 779 if (!spdk_u32_is_pow2(block_size)) { 780 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 781 goto error_return; 782 } 783 784 uring->bdev.blocklen = block_size; 785 uring->bdev.required_alignment = spdk_u32log2(block_size); 786 787 rc = bdev_uring_check_zoned_support(uring, name, filename); 788 if (rc) { 789 goto error_return; 790 } 791 792 if (bdev_size % uring->bdev.blocklen != 0) { 793 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 794 bdev_size, uring->bdev.blocklen); 795 goto error_return; 796 } 797 798 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 799 uring->bdev.ctxt = uring; 800 801 uring->bdev.fn_table = &uring_fn_table; 802 803 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 804 sizeof(struct bdev_uring_io_channel), 805 uring->bdev.name); 806 rc = spdk_bdev_register(&uring->bdev); 807 if (rc) { 808 spdk_io_device_unregister(uring, NULL); 809 goto error_return; 810 } 811 812 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 813 return &uring->bdev; 814 815 error_return: 816 bdev_uring_close(uring); 817 uring_free_bdev(uring); 818 return NULL; 819 } 820 821 struct delete_uring_bdev_ctx { 822 spdk_delete_uring_complete cb_fn; 823 void *cb_arg; 824 }; 825 826 static void 827 uring_bdev_unregister_cb(void *arg, int bdeverrno) 828 { 829 struct delete_uring_bdev_ctx *ctx = arg; 830 831 ctx->cb_fn(ctx->cb_arg, bdeverrno); 832 free(ctx); 833 } 834 835 void 836 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 837 { 838 struct delete_uring_bdev_ctx *ctx; 839 int rc; 840 841 ctx = calloc(1, sizeof(*ctx)); 842 if (ctx == NULL) { 843 cb_fn(cb_arg, -ENOMEM); 844 return; 845 } 846 847 ctx->cb_fn = cb_fn; 848 ctx->cb_arg = cb_arg; 849 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 850 if (rc != 0) { 851 uring_bdev_unregister_cb(ctx, rc); 852 } 853 } 854 855 static int 856 bdev_uring_init(void) 857 { 858 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 859 sizeof(struct bdev_uring_group_channel), "uring_module"); 860 861 return 0; 862 } 863 864 static void 865 bdev_uring_fini(void) 866 { 867 spdk_io_device_unregister(&uring_if, NULL); 868 } 869 870 SPDK_LOG_REGISTER_COMPONENT(uring) 871