1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_uring.h" 7 8 #include "spdk/stdinc.h" 9 #include "spdk/config.h" 10 #include "spdk/barrier.h" 11 #include "spdk/bdev.h" 12 #include "spdk/env.h" 13 #include "spdk/fd.h" 14 #include "spdk/likely.h" 15 #include "spdk/thread.h" 16 #include "spdk/json.h" 17 #include "spdk/util.h" 18 #include "spdk/string.h" 19 20 #include "spdk/log.h" 21 #include "spdk_internal/uring.h" 22 23 #ifdef SPDK_CONFIG_URING_ZNS 24 #include <linux/blkzoned.h> 25 #define SECTOR_SHIFT 9 26 #endif 27 28 struct bdev_uring_zoned_dev { 29 uint64_t num_zones; 30 uint32_t zone_shift; 31 uint32_t lba_shift; 32 }; 33 34 struct bdev_uring_io_channel { 35 struct bdev_uring_group_channel *group_ch; 36 }; 37 38 struct bdev_uring_group_channel { 39 uint64_t io_inflight; 40 uint64_t io_pending; 41 struct spdk_poller *poller; 42 struct io_uring uring; 43 }; 44 45 struct bdev_uring_task { 46 uint64_t len; 47 struct bdev_uring_io_channel *ch; 48 TAILQ_ENTRY(bdev_uring_task) link; 49 }; 50 51 struct bdev_uring { 52 struct spdk_bdev bdev; 53 struct bdev_uring_zoned_dev zd; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(bdev_uring) link; 57 }; 58 59 static int bdev_uring_init(void); 60 static void bdev_uring_fini(void); 61 static void uring_free_bdev(struct bdev_uring *uring); 62 static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head); 63 64 #define SPDK_URING_QUEUE_DEPTH 512 65 #define MAX_EVENTS_PER_POLL 32 66 67 static int 68 bdev_uring_get_ctx_size(void) 69 { 70 return sizeof(struct bdev_uring_task); 71 } 72 73 static struct spdk_bdev_module uring_if = { 74 .name = "uring", 75 .module_init = bdev_uring_init, 76 .module_fini = bdev_uring_fini, 77 .get_ctx_size = bdev_uring_get_ctx_size, 78 }; 79 80 SPDK_BDEV_MODULE_REGISTER(uring, &uring_if) 81 82 static int 83 bdev_uring_open(struct bdev_uring *bdev) 84 { 85 int fd; 86 87 fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME); 88 if (fd < 0) { 89 /* Try without O_DIRECT for non-disk files */ 90 fd = open(bdev->filename, O_RDWR | O_NOATIME); 91 if (fd < 0) { 92 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 93 bdev->filename, errno, spdk_strerror(errno)); 94 bdev->fd = -1; 95 return -1; 96 } 97 } 98 99 bdev->fd = fd; 100 101 return 0; 102 } 103 104 static int 105 bdev_uring_close(struct bdev_uring *bdev) 106 { 107 int rc; 108 109 if (bdev->fd == -1) { 110 return 0; 111 } 112 113 rc = close(bdev->fd); 114 if (rc < 0) { 115 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 116 bdev->fd, errno, spdk_strerror(errno)); 117 return -1; 118 } 119 120 bdev->fd = -1; 121 122 return 0; 123 } 124 125 static int64_t 126 bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch, 127 struct bdev_uring_task *uring_task, 128 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 129 { 130 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 131 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 132 struct io_uring_sqe *sqe; 133 134 sqe = io_uring_get_sqe(&group_ch->uring); 135 if (!sqe) { 136 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 137 return -ENOMEM; 138 } 139 140 io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset); 141 io_uring_sqe_set_data(sqe, uring_task); 142 uring_task->len = nbytes; 143 uring_task->ch = uring_ch; 144 145 SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n", 146 iovcnt, nbytes, offset); 147 148 group_ch->io_pending++; 149 return nbytes; 150 } 151 152 static int64_t 153 bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch, 154 struct bdev_uring_task *uring_task, 155 struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset) 156 { 157 struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch); 158 struct bdev_uring_group_channel *group_ch = uring_ch->group_ch; 159 struct io_uring_sqe *sqe; 160 161 sqe = io_uring_get_sqe(&group_ch->uring); 162 if (!sqe) { 163 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n"); 164 return -ENOMEM; 165 } 166 167 io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset); 168 io_uring_sqe_set_data(sqe, uring_task); 169 uring_task->len = nbytes; 170 uring_task->ch = uring_ch; 171 172 SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n", 173 iovcnt, nbytes, offset); 174 175 group_ch->io_pending++; 176 return nbytes; 177 } 178 179 static int 180 bdev_uring_destruct(void *ctx) 181 { 182 struct bdev_uring *uring = ctx; 183 int rc = 0; 184 185 TAILQ_REMOVE(&g_uring_bdev_head, uring, link); 186 rc = bdev_uring_close(uring); 187 if (rc < 0) { 188 SPDK_ERRLOG("bdev_uring_close() failed\n"); 189 } 190 spdk_io_device_unregister(uring, NULL); 191 uring_free_bdev(uring); 192 return rc; 193 } 194 195 static int 196 bdev_uring_reap(struct io_uring *ring, int max) 197 { 198 int i, count, ret; 199 struct io_uring_cqe *cqe; 200 struct bdev_uring_task *uring_task; 201 enum spdk_bdev_io_status status; 202 203 count = 0; 204 for (i = 0; i < max; i++) { 205 ret = io_uring_peek_cqe(ring, &cqe); 206 if (ret != 0) { 207 return ret; 208 } 209 210 if (cqe == NULL) { 211 return count; 212 } 213 214 uring_task = (struct bdev_uring_task *)cqe->user_data; 215 if (cqe->res != (signed)uring_task->len) { 216 status = SPDK_BDEV_IO_STATUS_FAILED; 217 } else { 218 status = SPDK_BDEV_IO_STATUS_SUCCESS; 219 } 220 221 uring_task->ch->group_ch->io_inflight--; 222 io_uring_cqe_seen(ring, cqe); 223 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status); 224 count++; 225 } 226 227 return count; 228 } 229 230 static int 231 bdev_uring_group_poll(void *arg) 232 { 233 struct bdev_uring_group_channel *group_ch = arg; 234 int to_complete, to_submit; 235 int count, ret; 236 237 to_submit = group_ch->io_pending; 238 239 if (to_submit > 0) { 240 /* If there are I/O to submit, use io_uring_submit here. 241 * It will automatically call spdk_io_uring_enter appropriately. */ 242 ret = io_uring_submit(&group_ch->uring); 243 if (ret < 0) { 244 return SPDK_POLLER_BUSY; 245 } 246 247 group_ch->io_pending = 0; 248 group_ch->io_inflight += to_submit; 249 } 250 251 to_complete = group_ch->io_inflight; 252 count = 0; 253 if (to_complete > 0) { 254 count = bdev_uring_reap(&group_ch->uring, to_complete); 255 } 256 257 if (count + to_submit > 0) { 258 return SPDK_POLLER_BUSY; 259 } else { 260 return SPDK_POLLER_IDLE; 261 } 262 } 263 264 static void 265 bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 266 bool success) 267 { 268 int64_t ret = 0; 269 270 if (!success) { 271 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 272 return; 273 } 274 275 switch (bdev_io->type) { 276 case SPDK_BDEV_IO_TYPE_READ: 277 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt, 278 ch, 279 (struct bdev_uring_task *)bdev_io->driver_ctx, 280 bdev_io->u.bdev.iovs, 281 bdev_io->u.bdev.iovcnt, 282 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 283 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 284 break; 285 case SPDK_BDEV_IO_TYPE_WRITE: 286 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt, 287 ch, 288 (struct bdev_uring_task *)bdev_io->driver_ctx, 289 bdev_io->u.bdev.iovs, 290 bdev_io->u.bdev.iovcnt, 291 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 292 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 293 break; 294 default: 295 SPDK_ERRLOG("Wrong io type\n"); 296 break; 297 } 298 299 if (ret == -ENOMEM) { 300 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 301 } 302 } 303 304 #ifdef SPDK_CONFIG_URING_ZNS 305 static int 306 bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len) 307 { 308 char *path = NULL; 309 char *device = NULL; 310 char *name; 311 FILE *file; 312 int ret = 0; 313 314 name = strdup(devname); 315 if (name == NULL) { 316 return -EINVAL; 317 } 318 device = basename(name); 319 path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr); 320 free(name); 321 if (!path) { 322 return -EINVAL; 323 } 324 325 file = fopen(path, "r"); 326 if (!file) { 327 free(path); 328 return -ENOENT; 329 } 330 331 if (!fgets(str, str_len, file)) { 332 ret = -EINVAL; 333 goto close; 334 } 335 336 spdk_str_chomp(str); 337 338 close: 339 free(path); 340 fclose(file); 341 return ret; 342 } 343 344 static int 345 bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val) 346 { 347 char str[128]; 348 int ret; 349 350 ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str)); 351 if (ret) { 352 return ret; 353 } 354 355 *val = spdk_strtol(str, 10); 356 357 return 0; 358 } 359 360 static int 361 bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 362 { 363 switch (zones_rep->type) { 364 case BLK_ZONE_TYPE_CONVENTIONAL: 365 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV; 366 break; 367 case BLK_ZONE_TYPE_SEQWRITE_REQ: 368 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR; 369 break; 370 case BLK_ZONE_TYPE_SEQWRITE_PREF: 371 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP; 372 break; 373 default: 374 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type); 375 return -EIO; 376 } 377 return 0; 378 } 379 380 static int 381 bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep) 382 { 383 switch (zones_rep->cond) { 384 case BLK_ZONE_COND_EMPTY: 385 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 386 break; 387 case BLK_ZONE_COND_IMP_OPEN: 388 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 389 break; 390 case BLK_ZONE_COND_EXP_OPEN: 391 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 392 break; 393 case BLK_ZONE_COND_CLOSED: 394 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 395 break; 396 case BLK_ZONE_COND_READONLY: 397 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 398 break; 399 case BLK_ZONE_COND_FULL: 400 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL; 401 break; 402 case BLK_ZONE_COND_OFFLINE: 403 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 404 break; 405 case BLK_ZONE_COND_NOT_WP: 406 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP; 407 break; 408 default: 409 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond); 410 return -EIO; 411 } 412 return 0; 413 } 414 415 static int 416 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 417 { 418 struct bdev_uring *uring; 419 struct blk_zone_range range; 420 long unsigned zone_mgmt_op; 421 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 422 423 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 424 425 switch (bdev_io->u.zone_mgmt.zone_action) { 426 case SPDK_BDEV_ZONE_RESET: 427 zone_mgmt_op = BLKRESETZONE; 428 break; 429 case SPDK_BDEV_ZONE_OPEN: 430 zone_mgmt_op = BLKOPENZONE; 431 break; 432 case SPDK_BDEV_ZONE_CLOSE: 433 zone_mgmt_op = BLKCLOSEZONE; 434 break; 435 case SPDK_BDEV_ZONE_FINISH: 436 zone_mgmt_op = BLKFINISHZONE; 437 break; 438 default: 439 return -EINVAL; 440 } 441 442 range.sector = (zone_id << uring->zd.lba_shift); 443 range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift); 444 445 if (ioctl(uring->fd, zone_mgmt_op, &range)) { 446 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n", 447 bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno)); 448 return -EINVAL; 449 } 450 451 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 452 453 return 0; 454 } 455 456 static int 457 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 458 { 459 struct bdev_uring *uring; 460 struct blk_zone *zones; 461 struct blk_zone_report *rep; 462 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 463 size_t repsize; 464 uint32_t i, shift; 465 uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones; 466 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 467 468 uring = (struct bdev_uring *)bdev_io->bdev->ctxt; 469 shift = uring->zd.lba_shift; 470 471 if ((num_zones > uring->zd.num_zones) || !num_zones) { 472 return -EINVAL; 473 } 474 475 repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones); 476 rep = (struct blk_zone_report *)malloc(repsize); 477 if (!rep) { 478 return -ENOMEM; 479 } 480 481 zones = (struct blk_zone *)(rep + 1); 482 483 while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) { 484 memset(rep, 0, repsize); 485 rep->sector = zone_id; 486 rep->nr_zones = num_zones; 487 488 if (ioctl(uring->fd, BLKREPORTZONE, rep)) { 489 SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n", 490 errno, strerror(errno)); 491 free(rep); 492 return -EINVAL; 493 } 494 495 if (!rep->nr_zones) { 496 break; 497 } 498 499 for (i = 0; i < rep->nr_zones; i++) { 500 zone_info->zone_id = ((zones + i)->start >> shift); 501 zone_info->write_pointer = ((zones + i)->wp >> shift); 502 zone_info->capacity = ((zones + i)->capacity >> shift); 503 504 bdev_uring_fill_zone_state(zone_info, zones + i); 505 bdev_uring_fill_zone_type(zone_info, zones + i); 506 507 zone_id = ((zones + i)->start + (zones + i)->len) >> shift; 508 zone_info++; 509 num_zones--; 510 } 511 } 512 513 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 514 free(rep); 515 return 0; 516 } 517 518 static int 519 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 520 { 521 char str[128]; 522 long int val = 0; 523 uint32_t zinfo; 524 int retval = -1; 525 526 uring->bdev.zoned = false; 527 528 /* Check if this is a zoned block device */ 529 if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) { 530 SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno); 531 } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) { 532 /* Only host-aware & host-managed zns devices */ 533 uring->bdev.zoned = true; 534 535 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) { 536 SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno)); 537 goto err_ret; 538 } 539 uring->zd.num_zones = zinfo; 540 541 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) { 542 SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno)); 543 goto err_ret; 544 } 545 546 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT; 547 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift); 548 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift); 549 550 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) { 551 SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno)); 552 goto err_ret; 553 } 554 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val; 555 556 if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) { 557 SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno)); 558 goto err_ret; 559 } 560 uring->bdev.max_active_zones = (uint32_t)val; 561 retval = 0; 562 } else { 563 retval = 0; /* queue/zoned=none */ 564 } 565 566 err_ret: 567 return retval; 568 } 569 #else 570 /* No support for zoned devices */ 571 static int 572 bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io) 573 { 574 return -1; 575 } 576 577 static int 578 bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io) 579 { 580 return -1; 581 } 582 583 static int 584 bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename) 585 { 586 return 0; 587 } 588 #endif 589 590 static int 591 _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 592 { 593 594 switch (bdev_io->type) { 595 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 596 return bdev_uring_zone_get_info(bdev_io); 597 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 598 return bdev_uring_zone_management_op(bdev_io); 599 /* Read and write operations must be performed on buffers aligned to 600 * bdev->required_alignment. If user specified unaligned buffers, 601 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 602 case SPDK_BDEV_IO_TYPE_READ: 603 case SPDK_BDEV_IO_TYPE_WRITE: 604 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb, 605 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 606 return 0; 607 default: 608 return -1; 609 } 610 } 611 612 static void 613 bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 614 { 615 if (_bdev_uring_submit_request(ch, bdev_io) < 0) { 616 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 617 } 618 } 619 620 static bool 621 bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 622 { 623 switch (io_type) { 624 #ifdef SPDK_CONFIG_URING_ZNS 625 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 626 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 627 #endif 628 case SPDK_BDEV_IO_TYPE_READ: 629 case SPDK_BDEV_IO_TYPE_WRITE: 630 return true; 631 default: 632 return false; 633 } 634 } 635 636 static int 637 bdev_uring_create_cb(void *io_device, void *ctx_buf) 638 { 639 struct bdev_uring_io_channel *ch = ctx_buf; 640 641 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if)); 642 643 return 0; 644 } 645 646 static void 647 bdev_uring_destroy_cb(void *io_device, void *ctx_buf) 648 { 649 struct bdev_uring_io_channel *ch = ctx_buf; 650 651 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 652 } 653 654 static struct spdk_io_channel * 655 bdev_uring_get_io_channel(void *ctx) 656 { 657 struct bdev_uring *uring = ctx; 658 659 return spdk_get_io_channel(uring); 660 } 661 662 static int 663 bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 664 { 665 struct bdev_uring *uring = ctx; 666 667 spdk_json_write_named_object_begin(w, "uring"); 668 669 spdk_json_write_named_string(w, "filename", uring->filename); 670 671 spdk_json_write_object_end(w); 672 673 return 0; 674 } 675 676 static void 677 bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 678 { 679 struct bdev_uring *uring = bdev->ctxt; 680 681 spdk_json_write_object_begin(w); 682 683 spdk_json_write_named_string(w, "method", "bdev_uring_create"); 684 685 spdk_json_write_named_object_begin(w, "params"); 686 spdk_json_write_named_string(w, "name", bdev->name); 687 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 688 spdk_json_write_named_string(w, "filename", uring->filename); 689 spdk_json_write_object_end(w); 690 691 spdk_json_write_object_end(w); 692 } 693 694 static const struct spdk_bdev_fn_table uring_fn_table = { 695 .destruct = bdev_uring_destruct, 696 .submit_request = bdev_uring_submit_request, 697 .io_type_supported = bdev_uring_io_type_supported, 698 .get_io_channel = bdev_uring_get_io_channel, 699 .dump_info_json = bdev_uring_dump_info_json, 700 .write_config_json = bdev_uring_write_json_config, 701 }; 702 703 static void 704 uring_free_bdev(struct bdev_uring *uring) 705 { 706 if (uring == NULL) { 707 return; 708 } 709 free(uring->filename); 710 free(uring->bdev.name); 711 free(uring); 712 } 713 714 static int 715 bdev_uring_group_create_cb(void *io_device, void *ctx_buf) 716 { 717 struct bdev_uring_group_channel *ch = ctx_buf; 718 719 /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only 720 * local devices but also devices attached from remote target */ 721 if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) { 722 SPDK_ERRLOG("uring I/O context setup failure\n"); 723 return -1; 724 } 725 726 ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0); 727 return 0; 728 } 729 730 static void 731 bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf) 732 { 733 struct bdev_uring_group_channel *ch = ctx_buf; 734 735 io_uring_queue_exit(&ch->uring); 736 737 spdk_poller_unregister(&ch->poller); 738 } 739 740 struct spdk_bdev * 741 create_uring_bdev(const char *name, const char *filename, uint32_t block_size) 742 { 743 struct bdev_uring *uring; 744 uint32_t detected_block_size; 745 uint64_t bdev_size; 746 int rc; 747 748 uring = calloc(1, sizeof(*uring)); 749 if (!uring) { 750 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n"); 751 return NULL; 752 } 753 754 uring->filename = strdup(filename); 755 if (!uring->filename) { 756 goto error_return; 757 } 758 759 if (bdev_uring_open(uring)) { 760 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, uring->fd, errno); 761 goto error_return; 762 } 763 764 bdev_size = spdk_fd_get_size(uring->fd); 765 766 uring->bdev.name = strdup(name); 767 if (!uring->bdev.name) { 768 goto error_return; 769 } 770 uring->bdev.product_name = "URING bdev"; 771 uring->bdev.module = &uring_if; 772 773 uring->bdev.write_cache = 0; 774 775 detected_block_size = spdk_fd_get_blocklen(uring->fd); 776 if (block_size == 0) { 777 /* User did not specify block size - use autodetected block size. */ 778 if (detected_block_size == 0) { 779 SPDK_ERRLOG("Block size could not be auto-detected\n"); 780 goto error_return; 781 } 782 block_size = detected_block_size; 783 } else { 784 if (block_size < detected_block_size) { 785 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 786 "auto-detected block size %" PRIu32 "\n", 787 block_size, detected_block_size); 788 goto error_return; 789 } else if (detected_block_size != 0 && block_size != detected_block_size) { 790 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 791 "auto-detected block size %" PRIu32 "\n", 792 block_size, detected_block_size); 793 } 794 } 795 796 if (block_size < 512) { 797 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 798 goto error_return; 799 } 800 801 if (!spdk_u32_is_pow2(block_size)) { 802 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 803 goto error_return; 804 } 805 806 uring->bdev.blocklen = block_size; 807 uring->bdev.required_alignment = spdk_u32log2(block_size); 808 809 rc = bdev_uring_check_zoned_support(uring, name, filename); 810 if (rc) { 811 goto error_return; 812 } 813 814 if (bdev_size % uring->bdev.blocklen != 0) { 815 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 816 bdev_size, uring->bdev.blocklen); 817 goto error_return; 818 } 819 820 uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen; 821 uring->bdev.ctxt = uring; 822 823 uring->bdev.fn_table = &uring_fn_table; 824 825 spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb, 826 sizeof(struct bdev_uring_io_channel), 827 uring->bdev.name); 828 rc = spdk_bdev_register(&uring->bdev); 829 if (rc) { 830 spdk_io_device_unregister(uring, NULL); 831 goto error_return; 832 } 833 834 TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link); 835 return &uring->bdev; 836 837 error_return: 838 bdev_uring_close(uring); 839 uring_free_bdev(uring); 840 return NULL; 841 } 842 843 struct delete_uring_bdev_ctx { 844 spdk_delete_uring_complete cb_fn; 845 void *cb_arg; 846 }; 847 848 static void 849 uring_bdev_unregister_cb(void *arg, int bdeverrno) 850 { 851 struct delete_uring_bdev_ctx *ctx = arg; 852 853 ctx->cb_fn(ctx->cb_arg, bdeverrno); 854 free(ctx); 855 } 856 857 void 858 delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg) 859 { 860 struct delete_uring_bdev_ctx *ctx; 861 int rc; 862 863 ctx = calloc(1, sizeof(*ctx)); 864 if (ctx == NULL) { 865 cb_fn(cb_arg, -ENOMEM); 866 return; 867 } 868 869 ctx->cb_fn = cb_fn; 870 ctx->cb_arg = cb_arg; 871 rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx); 872 if (rc != 0) { 873 uring_bdev_unregister_cb(ctx, rc); 874 } 875 } 876 877 static int 878 bdev_uring_init(void) 879 { 880 spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb, 881 sizeof(struct bdev_uring_group_channel), "uring_module"); 882 883 return 0; 884 } 885 886 static void 887 bdev_uring_fini(void) 888 { 889 spdk_io_device_unregister(&uring_if, NULL); 890 } 891 892 SPDK_LOG_REGISTER_COMPONENT(uring) 893