1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_aio.h" 8 9 #include "spdk/stdinc.h" 10 11 #include "spdk/barrier.h" 12 #include "spdk/bdev.h" 13 #include "spdk/bdev_module.h" 14 #include "spdk/env.h" 15 #include "spdk/fd.h" 16 #include "spdk/likely.h" 17 #include "spdk/thread.h" 18 #include "spdk/json.h" 19 #include "spdk/util.h" 20 #include "spdk/string.h" 21 22 #include "spdk/log.h" 23 24 #include <sys/eventfd.h> 25 #include <libaio.h> 26 27 struct bdev_aio_io_channel { 28 uint64_t io_inflight; 29 io_context_t io_ctx; 30 struct bdev_aio_group_channel *group_ch; 31 TAILQ_ENTRY(bdev_aio_io_channel) link; 32 }; 33 34 struct bdev_aio_group_channel { 35 /* eventfd for io completion notification in interrupt mode. 36 * Negative value like '-1' indicates it is invalid or unused. 37 */ 38 int efd; 39 struct spdk_interrupt *intr; 40 struct spdk_poller *poller; 41 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 42 }; 43 44 struct bdev_aio_task { 45 struct iocb iocb; 46 uint64_t len; 47 struct bdev_aio_io_channel *ch; 48 }; 49 50 struct file_disk { 51 struct bdev_aio_task *reset_task; 52 struct spdk_poller *reset_retry_timer; 53 struct spdk_bdev disk; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(file_disk) link; 57 bool block_size_override; 58 }; 59 60 /* For user space reaping of completions */ 61 struct spdk_aio_ring { 62 uint32_t id; 63 uint32_t size; 64 uint32_t head; 65 uint32_t tail; 66 67 uint32_t version; 68 uint32_t compat_features; 69 uint32_t incompat_features; 70 uint32_t header_length; 71 }; 72 73 #define SPDK_AIO_RING_VERSION 0xa10a10a1 74 75 static int bdev_aio_initialize(void); 76 static void bdev_aio_fini(void); 77 static void aio_free_disk(struct file_disk *fdisk); 78 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 79 80 #define SPDK_AIO_QUEUE_DEPTH 128 81 #define MAX_EVENTS_PER_POLL 32 82 83 static int 84 bdev_aio_get_ctx_size(void) 85 { 86 return sizeof(struct bdev_aio_task); 87 } 88 89 static struct spdk_bdev_module aio_if = { 90 .name = "aio", 91 .module_init = bdev_aio_initialize, 92 .module_fini = bdev_aio_fini, 93 .get_ctx_size = bdev_aio_get_ctx_size, 94 }; 95 96 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 97 98 static int 99 bdev_aio_open(struct file_disk *disk) 100 { 101 int fd; 102 103 fd = open(disk->filename, O_RDWR | O_DIRECT); 104 if (fd < 0) { 105 /* Try without O_DIRECT for non-disk files */ 106 fd = open(disk->filename, O_RDWR); 107 if (fd < 0) { 108 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 109 disk->filename, errno, spdk_strerror(errno)); 110 disk->fd = -1; 111 return -1; 112 } 113 } 114 115 disk->fd = fd; 116 117 return 0; 118 } 119 120 static int 121 bdev_aio_close(struct file_disk *disk) 122 { 123 int rc; 124 125 if (disk->fd == -1) { 126 return 0; 127 } 128 129 rc = close(disk->fd); 130 if (rc < 0) { 131 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 132 disk->fd, errno, spdk_strerror(errno)); 133 return -1; 134 } 135 136 disk->fd = -1; 137 138 return 0; 139 } 140 141 static void 142 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 143 struct bdev_aio_task *aio_task, 144 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 145 { 146 struct iocb *iocb = &aio_task->iocb; 147 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 148 int rc; 149 150 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 151 if (aio_ch->group_ch->efd >= 0) { 152 io_set_eventfd(iocb, aio_ch->group_ch->efd); 153 } 154 iocb->data = aio_task; 155 aio_task->len = nbytes; 156 aio_task->ch = aio_ch; 157 158 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 159 iovcnt, nbytes, offset); 160 161 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 162 if (spdk_unlikely(rc < 0)) { 163 if (rc == -EAGAIN) { 164 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 165 } else { 166 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 167 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 168 } 169 } else { 170 aio_ch->io_inflight++; 171 } 172 } 173 174 static void 175 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 176 struct bdev_aio_task *aio_task, 177 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 178 { 179 struct iocb *iocb = &aio_task->iocb; 180 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 181 int rc; 182 183 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 184 if (aio_ch->group_ch->efd >= 0) { 185 io_set_eventfd(iocb, aio_ch->group_ch->efd); 186 } 187 iocb->data = aio_task; 188 aio_task->len = len; 189 aio_task->ch = aio_ch; 190 191 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 192 iovcnt, len, offset); 193 194 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 195 if (spdk_unlikely(rc < 0)) { 196 if (rc == -EAGAIN) { 197 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 198 } else { 199 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 200 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 201 } 202 } else { 203 aio_ch->io_inflight++; 204 } 205 } 206 207 static void 208 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 209 { 210 int rc = fsync(fdisk->fd); 211 212 if (rc == 0) { 213 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 214 } else { 215 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno); 216 } 217 } 218 219 static void 220 bdev_aio_destruct_cb(void *io_device) 221 { 222 struct file_disk *fdisk = io_device; 223 int rc = 0; 224 225 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 226 rc = bdev_aio_close(fdisk); 227 if (rc < 0) { 228 SPDK_ERRLOG("bdev_aio_close() failed\n"); 229 } 230 231 aio_free_disk(fdisk); 232 } 233 234 static int 235 bdev_aio_destruct(void *ctx) 236 { 237 struct file_disk *fdisk = ctx; 238 239 spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb); 240 241 return 0; 242 } 243 244 static int 245 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 246 { 247 uint32_t head, tail, count; 248 struct spdk_aio_ring *ring; 249 struct timespec timeout; 250 struct io_event *kevents; 251 252 ring = (struct spdk_aio_ring *)io_ctx; 253 254 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 255 timeout.tv_sec = 0; 256 timeout.tv_nsec = 0; 257 258 return io_getevents(io_ctx, 0, max, uevents, &timeout); 259 } 260 261 /* Read the current state out of the ring */ 262 head = ring->head; 263 tail = ring->tail; 264 265 /* This memory barrier is required to prevent the loads above 266 * from being re-ordered with stores to the events array 267 * potentially occurring on other threads. */ 268 spdk_smp_rmb(); 269 270 /* Calculate how many items are in the circular ring */ 271 count = tail - head; 272 if (tail < head) { 273 count += ring->size; 274 } 275 276 /* Reduce the count to the limit provided by the user */ 277 count = spdk_min(max, count); 278 279 /* Grab the memory location of the event array */ 280 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 281 282 /* Copy the events out of the ring. */ 283 if ((head + count) <= ring->size) { 284 /* Only one copy is required */ 285 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 286 } else { 287 uint32_t first_part = ring->size - head; 288 /* Two copies are required */ 289 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 290 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 291 } 292 293 /* Update the head pointer. On x86, stores will not be reordered with older loads, 294 * so the copies out of the event array will always be complete prior to this 295 * update becoming visible. On other architectures this is not guaranteed, so 296 * add a barrier. */ 297 #if defined(__i386__) || defined(__x86_64__) 298 spdk_compiler_barrier(); 299 #else 300 spdk_smp_mb(); 301 #endif 302 ring->head = (head + count) % ring->size; 303 304 return count; 305 } 306 307 static int 308 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 309 { 310 int nr, i, res = 0; 311 struct bdev_aio_task *aio_task; 312 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 313 314 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 315 if (nr < 0) { 316 return 0; 317 } 318 319 for (i = 0; i < nr; i++) { 320 aio_task = events[i].data; 321 aio_task->ch->io_inflight--; 322 if (events[i].res == aio_task->len) { 323 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 324 } else { 325 /* From aio_abi.h, io_event.res is defined __s64, negative errno 326 * will be assigned to io_event.res for error situation. 327 * But from libaio.h, io_event.res is defined unsigned long, so 328 * convert it to signed value for error detection. 329 */ 330 SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res); 331 res = (int)events[i].res; 332 if (res < 0) { 333 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res); 334 } else { 335 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 336 } 337 } 338 } 339 340 return nr; 341 } 342 343 static int 344 bdev_aio_group_poll(void *arg) 345 { 346 struct bdev_aio_group_channel *group_ch = arg; 347 struct bdev_aio_io_channel *io_ch; 348 int nr = 0; 349 350 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 351 nr += bdev_aio_io_channel_poll(io_ch); 352 } 353 354 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 355 } 356 357 static int 358 bdev_aio_group_interrupt(void *arg) 359 { 360 struct bdev_aio_group_channel *group_ch = arg; 361 int rc; 362 uint64_t num_events; 363 364 assert(group_ch->efd >= 0); 365 366 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 367 * io_getevent should be called again to ensure all completed IO are processed. 368 */ 369 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 370 if (rc < 0) { 371 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 372 return -errno; 373 } 374 375 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 376 num_events -= SPDK_AIO_QUEUE_DEPTH; 377 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 378 if (rc < 0) { 379 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 380 } 381 } 382 383 return bdev_aio_group_poll(group_ch); 384 } 385 386 static void 387 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 388 { 389 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 390 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 391 392 if (aio_ch->io_inflight) { 393 spdk_for_each_channel_continue(i, -1); 394 return; 395 } 396 397 spdk_for_each_channel_continue(i, 0); 398 } 399 400 static int bdev_aio_reset_retry_timer(void *arg); 401 402 static void 403 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 404 { 405 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 406 407 if (status == -1) { 408 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 409 return; 410 } 411 412 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 413 } 414 415 static int 416 bdev_aio_reset_retry_timer(void *arg) 417 { 418 struct file_disk *fdisk = arg; 419 420 if (fdisk->reset_retry_timer) { 421 spdk_poller_unregister(&fdisk->reset_retry_timer); 422 } 423 424 spdk_for_each_channel(fdisk, 425 _bdev_aio_get_io_inflight, 426 fdisk, 427 _bdev_aio_get_io_inflight_done); 428 429 return SPDK_POLLER_BUSY; 430 } 431 432 static void 433 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 434 { 435 fdisk->reset_task = aio_task; 436 437 bdev_aio_reset_retry_timer(fdisk); 438 } 439 440 static void 441 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 442 bool success) 443 { 444 if (!success) { 445 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 446 return; 447 } 448 449 switch (bdev_io->type) { 450 case SPDK_BDEV_IO_TYPE_READ: 451 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 452 ch, 453 (struct bdev_aio_task *)bdev_io->driver_ctx, 454 bdev_io->u.bdev.iovs, 455 bdev_io->u.bdev.iovcnt, 456 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 457 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 458 break; 459 case SPDK_BDEV_IO_TYPE_WRITE: 460 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 461 ch, 462 (struct bdev_aio_task *)bdev_io->driver_ctx, 463 bdev_io->u.bdev.iovs, 464 bdev_io->u.bdev.iovcnt, 465 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 466 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 467 break; 468 default: 469 SPDK_ERRLOG("Wrong io type\n"); 470 break; 471 } 472 } 473 474 static int 475 _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 476 { 477 switch (bdev_io->type) { 478 /* Read and write operations must be performed on buffers aligned to 479 * bdev->required_alignment. If user specified unaligned buffers, 480 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 481 case SPDK_BDEV_IO_TYPE_READ: 482 case SPDK_BDEV_IO_TYPE_WRITE: 483 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 484 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 485 return 0; 486 case SPDK_BDEV_IO_TYPE_FLUSH: 487 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 488 (struct bdev_aio_task *)bdev_io->driver_ctx); 489 return 0; 490 491 case SPDK_BDEV_IO_TYPE_RESET: 492 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 493 (struct bdev_aio_task *)bdev_io->driver_ctx); 494 return 0; 495 default: 496 return -1; 497 } 498 } 499 500 static void 501 bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 502 { 503 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 504 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 505 } 506 } 507 508 static bool 509 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 510 { 511 switch (io_type) { 512 case SPDK_BDEV_IO_TYPE_READ: 513 case SPDK_BDEV_IO_TYPE_WRITE: 514 case SPDK_BDEV_IO_TYPE_FLUSH: 515 case SPDK_BDEV_IO_TYPE_RESET: 516 return true; 517 518 default: 519 return false; 520 } 521 } 522 523 static int 524 bdev_aio_create_cb(void *io_device, void *ctx_buf) 525 { 526 struct bdev_aio_io_channel *ch = ctx_buf; 527 528 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 529 SPDK_ERRLOG("async I/O context setup failure\n"); 530 return -1; 531 } 532 533 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 534 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 535 536 return 0; 537 } 538 539 static void 540 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 541 { 542 struct bdev_aio_io_channel *ch = ctx_buf; 543 544 io_destroy(ch->io_ctx); 545 546 assert(ch->group_ch); 547 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 548 549 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 550 } 551 552 static struct spdk_io_channel * 553 bdev_aio_get_io_channel(void *ctx) 554 { 555 struct file_disk *fdisk = ctx; 556 557 return spdk_get_io_channel(fdisk); 558 } 559 560 561 static int 562 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 563 { 564 struct file_disk *fdisk = ctx; 565 566 spdk_json_write_named_object_begin(w, "aio"); 567 568 spdk_json_write_named_string(w, "filename", fdisk->filename); 569 570 spdk_json_write_object_end(w); 571 572 return 0; 573 } 574 575 static void 576 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 577 { 578 struct file_disk *fdisk = bdev->ctxt; 579 580 spdk_json_write_object_begin(w); 581 582 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 583 584 spdk_json_write_named_object_begin(w, "params"); 585 spdk_json_write_named_string(w, "name", bdev->name); 586 if (fdisk->block_size_override) { 587 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 588 } 589 spdk_json_write_named_string(w, "filename", fdisk->filename); 590 spdk_json_write_object_end(w); 591 592 spdk_json_write_object_end(w); 593 } 594 595 static const struct spdk_bdev_fn_table aio_fn_table = { 596 .destruct = bdev_aio_destruct, 597 .submit_request = bdev_aio_submit_request, 598 .io_type_supported = bdev_aio_io_type_supported, 599 .get_io_channel = bdev_aio_get_io_channel, 600 .dump_info_json = bdev_aio_dump_info_json, 601 .write_config_json = bdev_aio_write_json_config, 602 }; 603 604 static void 605 aio_free_disk(struct file_disk *fdisk) 606 { 607 if (fdisk == NULL) { 608 return; 609 } 610 free(fdisk->filename); 611 free(fdisk->disk.name); 612 free(fdisk); 613 } 614 615 static int 616 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 617 { 618 int efd; 619 620 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 621 if (efd < 0) { 622 return -1; 623 } 624 625 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 626 if (ch->intr == NULL) { 627 close(efd); 628 return -1; 629 } 630 ch->efd = efd; 631 632 return 0; 633 } 634 635 static void 636 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 637 { 638 spdk_interrupt_unregister(&ch->intr); 639 close(ch->efd); 640 ch->efd = -1; 641 } 642 643 static void 644 bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode) 645 { 646 return; 647 } 648 649 static int 650 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 651 { 652 struct bdev_aio_group_channel *ch = ctx_buf; 653 int rc; 654 655 TAILQ_INIT(&ch->io_ch_head); 656 /* Initialize ch->efd to be invalid and unused. */ 657 ch->efd = -1; 658 if (spdk_interrupt_mode_is_enabled()) { 659 rc = bdev_aio_register_interrupt(ch); 660 if (rc < 0) { 661 SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n"); 662 return rc; 663 } 664 } 665 666 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 667 spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL); 668 669 return 0; 670 } 671 672 static void 673 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 674 { 675 struct bdev_aio_group_channel *ch = ctx_buf; 676 677 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 678 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 679 } 680 681 spdk_poller_unregister(&ch->poller); 682 if (spdk_interrupt_mode_is_enabled()) { 683 bdev_aio_unregister_interrupt(ch); 684 } 685 } 686 687 int 688 create_aio_bdev(const char *name, const char *filename, uint32_t block_size) 689 { 690 struct file_disk *fdisk; 691 uint32_t detected_block_size; 692 uint64_t disk_size; 693 int rc; 694 695 fdisk = calloc(1, sizeof(*fdisk)); 696 if (!fdisk) { 697 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 698 return -ENOMEM; 699 } 700 701 fdisk->filename = strdup(filename); 702 if (!fdisk->filename) { 703 rc = -ENOMEM; 704 goto error_return; 705 } 706 707 if (bdev_aio_open(fdisk)) { 708 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 709 rc = -errno; 710 goto error_return; 711 } 712 713 disk_size = spdk_fd_get_size(fdisk->fd); 714 715 fdisk->disk.name = strdup(name); 716 if (!fdisk->disk.name) { 717 rc = -ENOMEM; 718 goto error_return; 719 } 720 fdisk->disk.product_name = "AIO disk"; 721 fdisk->disk.module = &aio_if; 722 723 fdisk->disk.write_cache = 1; 724 725 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 726 if (block_size == 0) { 727 /* User did not specify block size - use autodetected block size. */ 728 if (detected_block_size == 0) { 729 SPDK_ERRLOG("Block size could not be auto-detected\n"); 730 rc = -EINVAL; 731 goto error_return; 732 } 733 fdisk->block_size_override = false; 734 block_size = detected_block_size; 735 } else { 736 if (block_size < detected_block_size) { 737 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 738 "auto-detected block size %" PRIu32 "\n", 739 block_size, detected_block_size); 740 rc = -EINVAL; 741 goto error_return; 742 } else if (detected_block_size != 0 && block_size != detected_block_size) { 743 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 744 "auto-detected block size %" PRIu32 "\n", 745 block_size, detected_block_size); 746 } 747 fdisk->block_size_override = true; 748 } 749 750 if (block_size < 512) { 751 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 752 rc = -EINVAL; 753 goto error_return; 754 } 755 756 if (!spdk_u32_is_pow2(block_size)) { 757 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 758 rc = -EINVAL; 759 goto error_return; 760 } 761 762 fdisk->disk.blocklen = block_size; 763 if (fdisk->block_size_override && detected_block_size) { 764 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 765 } else { 766 fdisk->disk.required_alignment = spdk_u32log2(block_size); 767 } 768 769 if (disk_size % fdisk->disk.blocklen != 0) { 770 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 771 disk_size, fdisk->disk.blocklen); 772 rc = -EINVAL; 773 goto error_return; 774 } 775 776 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 777 fdisk->disk.ctxt = fdisk; 778 779 fdisk->disk.fn_table = &aio_fn_table; 780 781 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 782 sizeof(struct bdev_aio_io_channel), 783 fdisk->disk.name); 784 rc = spdk_bdev_register(&fdisk->disk); 785 if (rc) { 786 spdk_io_device_unregister(fdisk, NULL); 787 goto error_return; 788 } 789 790 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 791 return 0; 792 793 error_return: 794 bdev_aio_close(fdisk); 795 aio_free_disk(fdisk); 796 return rc; 797 } 798 799 static void 800 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 801 { 802 } 803 804 int 805 bdev_aio_rescan(const char *name) 806 { 807 struct spdk_bdev_desc *desc; 808 struct spdk_bdev *bdev; 809 struct file_disk *fdisk; 810 uint64_t disk_size, blockcnt; 811 int rc; 812 813 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 814 if (rc != 0) { 815 return rc; 816 } 817 818 bdev = spdk_bdev_desc_get_bdev(desc); 819 if (bdev->module != &aio_if) { 820 rc = -ENODEV; 821 goto exit; 822 } 823 824 fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk); 825 disk_size = spdk_fd_get_size(fdisk->fd); 826 blockcnt = disk_size / bdev->blocklen; 827 828 if (bdev->blockcnt != blockcnt) { 829 SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %" 830 PRIu64 "\n", 831 fdisk->filename, 832 bdev->blockcnt, 833 blockcnt); 834 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt); 835 if (rc != 0) { 836 SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n", 837 fdisk->filename, rc); 838 goto exit; 839 } 840 } 841 842 exit: 843 spdk_bdev_close(desc); 844 return rc; 845 } 846 847 struct delete_aio_bdev_ctx { 848 delete_aio_bdev_complete cb_fn; 849 void *cb_arg; 850 }; 851 852 static void 853 aio_bdev_unregister_cb(void *arg, int bdeverrno) 854 { 855 struct delete_aio_bdev_ctx *ctx = arg; 856 857 ctx->cb_fn(ctx->cb_arg, bdeverrno); 858 free(ctx); 859 } 860 861 void 862 bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg) 863 { 864 struct delete_aio_bdev_ctx *ctx; 865 int rc; 866 867 ctx = calloc(1, sizeof(*ctx)); 868 if (ctx == NULL) { 869 cb_fn(cb_arg, -ENOMEM); 870 return; 871 } 872 873 ctx->cb_fn = cb_fn; 874 ctx->cb_arg = cb_arg; 875 rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx); 876 if (rc != 0) { 877 aio_bdev_unregister_cb(ctx, rc); 878 } 879 } 880 881 static int 882 bdev_aio_initialize(void) 883 { 884 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 885 sizeof(struct bdev_aio_group_channel), "aio_module"); 886 887 return 0; 888 } 889 890 static void 891 bdev_aio_fini(void) 892 { 893 spdk_io_device_unregister(&aio_if, NULL); 894 } 895 896 SPDK_LOG_REGISTER_COMPONENT(aio) 897