1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_aio.h" 8 9 #include "spdk/stdinc.h" 10 11 #include "spdk/barrier.h" 12 #include "spdk/bdev.h" 13 #include "spdk/bdev_module.h" 14 #include "spdk/env.h" 15 #include "spdk/fd.h" 16 #include "spdk/likely.h" 17 #include "spdk/thread.h" 18 #include "spdk/json.h" 19 #include "spdk/util.h" 20 #include "spdk/string.h" 21 22 #include "spdk/log.h" 23 24 #include <sys/eventfd.h> 25 #include <libaio.h> 26 27 struct bdev_aio_io_channel { 28 uint64_t io_inflight; 29 io_context_t io_ctx; 30 struct bdev_aio_group_channel *group_ch; 31 TAILQ_ENTRY(bdev_aio_io_channel) link; 32 }; 33 34 struct bdev_aio_group_channel { 35 /* eventfd for io completion notification in interrupt mode. 36 * Negative value like '-1' indicates it is invalid or unused. 37 */ 38 int efd; 39 struct spdk_interrupt *intr; 40 struct spdk_poller *poller; 41 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 42 }; 43 44 struct bdev_aio_task { 45 struct iocb iocb; 46 uint64_t len; 47 struct bdev_aio_io_channel *ch; 48 }; 49 50 struct file_disk { 51 struct bdev_aio_task *reset_task; 52 struct spdk_poller *reset_retry_timer; 53 struct spdk_bdev disk; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(file_disk) link; 57 bool block_size_override; 58 bool readonly; 59 }; 60 61 /* For user space reaping of completions */ 62 struct spdk_aio_ring { 63 uint32_t id; 64 uint32_t size; 65 uint32_t head; 66 uint32_t tail; 67 68 uint32_t version; 69 uint32_t compat_features; 70 uint32_t incompat_features; 71 uint32_t header_length; 72 }; 73 74 #define SPDK_AIO_RING_VERSION 0xa10a10a1 75 76 static int bdev_aio_initialize(void); 77 static void bdev_aio_fini(void); 78 static void aio_free_disk(struct file_disk *fdisk); 79 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 80 81 #define SPDK_AIO_QUEUE_DEPTH 128 82 #define MAX_EVENTS_PER_POLL 32 83 84 static int 85 bdev_aio_get_ctx_size(void) 86 { 87 return sizeof(struct bdev_aio_task); 88 } 89 90 static struct spdk_bdev_module aio_if = { 91 .name = "aio", 92 .module_init = bdev_aio_initialize, 93 .module_fini = bdev_aio_fini, 94 .get_ctx_size = bdev_aio_get_ctx_size, 95 }; 96 97 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 98 99 static int 100 bdev_aio_open(struct file_disk *disk) 101 { 102 int fd; 103 int io_flag = disk->readonly ? O_RDONLY : O_RDWR; 104 105 fd = open(disk->filename, io_flag | O_DIRECT); 106 if (fd < 0) { 107 /* Try without O_DIRECT for non-disk files */ 108 fd = open(disk->filename, io_flag); 109 if (fd < 0) { 110 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 111 disk->filename, errno, spdk_strerror(errno)); 112 disk->fd = -1; 113 return -1; 114 } 115 } 116 117 disk->fd = fd; 118 119 return 0; 120 } 121 122 static int 123 bdev_aio_close(struct file_disk *disk) 124 { 125 int rc; 126 127 if (disk->fd == -1) { 128 return 0; 129 } 130 131 rc = close(disk->fd); 132 if (rc < 0) { 133 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 134 disk->fd, errno, spdk_strerror(errno)); 135 return -1; 136 } 137 138 disk->fd = -1; 139 140 return 0; 141 } 142 143 static void 144 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 145 struct bdev_aio_task *aio_task, 146 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 147 { 148 struct iocb *iocb = &aio_task->iocb; 149 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 150 int rc; 151 152 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 153 if (aio_ch->group_ch->efd >= 0) { 154 io_set_eventfd(iocb, aio_ch->group_ch->efd); 155 } 156 iocb->data = aio_task; 157 aio_task->len = nbytes; 158 aio_task->ch = aio_ch; 159 160 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 161 iovcnt, nbytes, offset); 162 163 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 164 if (spdk_unlikely(rc < 0)) { 165 if (rc == -EAGAIN) { 166 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 167 } else { 168 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 169 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 170 } 171 } else { 172 aio_ch->io_inflight++; 173 } 174 } 175 176 static void 177 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 178 struct bdev_aio_task *aio_task, 179 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 180 { 181 struct iocb *iocb = &aio_task->iocb; 182 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 183 int rc; 184 185 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 186 if (aio_ch->group_ch->efd >= 0) { 187 io_set_eventfd(iocb, aio_ch->group_ch->efd); 188 } 189 iocb->data = aio_task; 190 aio_task->len = len; 191 aio_task->ch = aio_ch; 192 193 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 194 iovcnt, len, offset); 195 196 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 197 if (spdk_unlikely(rc < 0)) { 198 if (rc == -EAGAIN) { 199 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 200 } else { 201 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 202 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 203 } 204 } else { 205 aio_ch->io_inflight++; 206 } 207 } 208 209 static void 210 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 211 { 212 int rc = fsync(fdisk->fd); 213 214 if (rc == 0) { 215 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 216 } else { 217 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno); 218 } 219 } 220 221 static void 222 bdev_aio_destruct_cb(void *io_device) 223 { 224 struct file_disk *fdisk = io_device; 225 int rc = 0; 226 227 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 228 rc = bdev_aio_close(fdisk); 229 if (rc < 0) { 230 SPDK_ERRLOG("bdev_aio_close() failed\n"); 231 } 232 aio_free_disk(fdisk); 233 } 234 235 static int 236 bdev_aio_destruct(void *ctx) 237 { 238 struct file_disk *fdisk = ctx; 239 240 spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb); 241 242 return 0; 243 } 244 245 static int 246 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 247 { 248 uint32_t head, tail, count; 249 struct spdk_aio_ring *ring; 250 struct timespec timeout; 251 struct io_event *kevents; 252 253 ring = (struct spdk_aio_ring *)io_ctx; 254 255 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 256 timeout.tv_sec = 0; 257 timeout.tv_nsec = 0; 258 259 return io_getevents(io_ctx, 0, max, uevents, &timeout); 260 } 261 262 /* Read the current state out of the ring */ 263 head = ring->head; 264 tail = ring->tail; 265 266 /* This memory barrier is required to prevent the loads above 267 * from being re-ordered with stores to the events array 268 * potentially occurring on other threads. */ 269 spdk_smp_rmb(); 270 271 /* Calculate how many items are in the circular ring */ 272 count = tail - head; 273 if (tail < head) { 274 count += ring->size; 275 } 276 277 /* Reduce the count to the limit provided by the user */ 278 count = spdk_min(max, count); 279 280 /* Grab the memory location of the event array */ 281 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 282 283 /* Copy the events out of the ring. */ 284 if ((head + count) <= ring->size) { 285 /* Only one copy is required */ 286 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 287 } else { 288 uint32_t first_part = ring->size - head; 289 /* Two copies are required */ 290 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 291 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 292 } 293 294 /* Update the head pointer. On x86, stores will not be reordered with older loads, 295 * so the copies out of the event array will always be complete prior to this 296 * update becoming visible. On other architectures this is not guaranteed, so 297 * add a barrier. */ 298 #if defined(__i386__) || defined(__x86_64__) 299 spdk_compiler_barrier(); 300 #else 301 spdk_smp_mb(); 302 #endif 303 ring->head = (head + count) % ring->size; 304 305 return count; 306 } 307 308 static int 309 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 310 { 311 int nr, i, res = 0; 312 struct bdev_aio_task *aio_task; 313 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 314 315 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 316 if (nr < 0) { 317 return 0; 318 } 319 320 for (i = 0; i < nr; i++) { 321 aio_task = events[i].data; 322 aio_task->ch->io_inflight--; 323 if (events[i].res == aio_task->len) { 324 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 325 } else { 326 /* From aio_abi.h, io_event.res is defined __s64, negative errno 327 * will be assigned to io_event.res for error situation. 328 * But from libaio.h, io_event.res is defined unsigned long, so 329 * convert it to signed value for error detection. 330 */ 331 SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res); 332 res = (int)events[i].res; 333 if (res < 0) { 334 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res); 335 } else { 336 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 337 } 338 } 339 } 340 341 return nr; 342 } 343 344 static int 345 bdev_aio_group_poll(void *arg) 346 { 347 struct bdev_aio_group_channel *group_ch = arg; 348 struct bdev_aio_io_channel *io_ch; 349 int nr = 0; 350 351 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 352 nr += bdev_aio_io_channel_poll(io_ch); 353 } 354 355 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 356 } 357 358 static int 359 bdev_aio_group_interrupt(void *arg) 360 { 361 struct bdev_aio_group_channel *group_ch = arg; 362 int rc; 363 uint64_t num_events; 364 365 assert(group_ch->efd >= 0); 366 367 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 368 * io_getevent should be called again to ensure all completed IO are processed. 369 */ 370 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 371 if (rc < 0) { 372 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 373 return -errno; 374 } 375 376 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 377 num_events -= SPDK_AIO_QUEUE_DEPTH; 378 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 379 if (rc < 0) { 380 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 381 } 382 } 383 384 return bdev_aio_group_poll(group_ch); 385 } 386 387 static void 388 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 389 { 390 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 391 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 392 393 if (aio_ch->io_inflight) { 394 spdk_for_each_channel_continue(i, -1); 395 return; 396 } 397 398 spdk_for_each_channel_continue(i, 0); 399 } 400 401 static int bdev_aio_reset_retry_timer(void *arg); 402 403 static void 404 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 405 { 406 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 407 408 if (status == -1) { 409 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 410 return; 411 } 412 413 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 414 } 415 416 static int 417 bdev_aio_reset_retry_timer(void *arg) 418 { 419 struct file_disk *fdisk = arg; 420 421 if (fdisk->reset_retry_timer) { 422 spdk_poller_unregister(&fdisk->reset_retry_timer); 423 } 424 425 spdk_for_each_channel(fdisk, 426 _bdev_aio_get_io_inflight, 427 fdisk, 428 _bdev_aio_get_io_inflight_done); 429 430 return SPDK_POLLER_BUSY; 431 } 432 433 static void 434 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 435 { 436 fdisk->reset_task = aio_task; 437 438 bdev_aio_reset_retry_timer(fdisk); 439 } 440 441 static void 442 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 443 bool success) 444 { 445 if (!success) { 446 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 447 return; 448 } 449 450 switch (bdev_io->type) { 451 case SPDK_BDEV_IO_TYPE_READ: 452 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 453 ch, 454 (struct bdev_aio_task *)bdev_io->driver_ctx, 455 bdev_io->u.bdev.iovs, 456 bdev_io->u.bdev.iovcnt, 457 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 458 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 459 break; 460 case SPDK_BDEV_IO_TYPE_WRITE: 461 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 462 ch, 463 (struct bdev_aio_task *)bdev_io->driver_ctx, 464 bdev_io->u.bdev.iovs, 465 bdev_io->u.bdev.iovcnt, 466 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 467 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 468 break; 469 default: 470 SPDK_ERRLOG("Wrong io type\n"); 471 break; 472 } 473 } 474 475 static int 476 _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 477 { 478 struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt; 479 480 switch (bdev_io->type) { 481 /* Read and write operations must be performed on buffers aligned to 482 * bdev->required_alignment. If user specified unaligned buffers, 483 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 484 case SPDK_BDEV_IO_TYPE_READ: 485 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 487 return 0; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 if (fdisk->readonly) { 490 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 491 } else { 492 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 493 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 494 } 495 return 0; 496 497 case SPDK_BDEV_IO_TYPE_FLUSH: 498 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 499 (struct bdev_aio_task *)bdev_io->driver_ctx); 500 return 0; 501 502 case SPDK_BDEV_IO_TYPE_RESET: 503 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 504 (struct bdev_aio_task *)bdev_io->driver_ctx); 505 return 0; 506 default: 507 return -1; 508 } 509 } 510 511 static void 512 bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 513 { 514 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 515 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 516 } 517 } 518 519 static bool 520 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 521 { 522 switch (io_type) { 523 case SPDK_BDEV_IO_TYPE_READ: 524 case SPDK_BDEV_IO_TYPE_WRITE: 525 case SPDK_BDEV_IO_TYPE_FLUSH: 526 case SPDK_BDEV_IO_TYPE_RESET: 527 return true; 528 529 default: 530 return false; 531 } 532 } 533 534 static int 535 bdev_aio_create_cb(void *io_device, void *ctx_buf) 536 { 537 struct bdev_aio_io_channel *ch = ctx_buf; 538 539 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 540 SPDK_ERRLOG("Async I/O context setup failure, likely due to exceeding kernel limit.\n"); 541 SPDK_ERRLOG("This limit may be increased using 'sysctl -w fs.aio-max-nr'.\n"); 542 return -1; 543 } 544 545 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 546 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 547 548 return 0; 549 } 550 551 static void 552 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 553 { 554 struct bdev_aio_io_channel *ch = ctx_buf; 555 556 io_destroy(ch->io_ctx); 557 558 assert(ch->group_ch); 559 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 560 561 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 562 } 563 564 static struct spdk_io_channel * 565 bdev_aio_get_io_channel(void *ctx) 566 { 567 struct file_disk *fdisk = ctx; 568 569 return spdk_get_io_channel(fdisk); 570 } 571 572 573 static int 574 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 575 { 576 struct file_disk *fdisk = ctx; 577 578 spdk_json_write_named_object_begin(w, "aio"); 579 580 spdk_json_write_named_string(w, "filename", fdisk->filename); 581 582 spdk_json_write_named_bool(w, "block_size_override", fdisk->block_size_override); 583 584 spdk_json_write_named_bool(w, "readonly", fdisk->readonly); 585 586 spdk_json_write_object_end(w); 587 588 return 0; 589 } 590 591 static void 592 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 593 { 594 struct file_disk *fdisk = bdev->ctxt; 595 596 spdk_json_write_object_begin(w); 597 598 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 599 600 spdk_json_write_named_object_begin(w, "params"); 601 spdk_json_write_named_string(w, "name", bdev->name); 602 if (fdisk->block_size_override) { 603 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 604 } 605 spdk_json_write_named_string(w, "filename", fdisk->filename); 606 spdk_json_write_named_bool(w, "readonly", fdisk->readonly); 607 spdk_json_write_object_end(w); 608 609 spdk_json_write_object_end(w); 610 } 611 612 static const struct spdk_bdev_fn_table aio_fn_table = { 613 .destruct = bdev_aio_destruct, 614 .submit_request = bdev_aio_submit_request, 615 .io_type_supported = bdev_aio_io_type_supported, 616 .get_io_channel = bdev_aio_get_io_channel, 617 .dump_info_json = bdev_aio_dump_info_json, 618 .write_config_json = bdev_aio_write_json_config, 619 }; 620 621 static void 622 aio_free_disk(struct file_disk *fdisk) 623 { 624 if (fdisk == NULL) { 625 return; 626 } 627 free(fdisk->filename); 628 free(fdisk->disk.name); 629 free(fdisk); 630 } 631 632 static int 633 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 634 { 635 int efd; 636 637 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 638 if (efd < 0) { 639 return -1; 640 } 641 642 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 643 if (ch->intr == NULL) { 644 close(efd); 645 return -1; 646 } 647 ch->efd = efd; 648 649 return 0; 650 } 651 652 static void 653 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 654 { 655 spdk_interrupt_unregister(&ch->intr); 656 close(ch->efd); 657 ch->efd = -1; 658 } 659 660 static void 661 bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode) 662 { 663 return; 664 } 665 666 static int 667 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 668 { 669 struct bdev_aio_group_channel *ch = ctx_buf; 670 int rc; 671 672 TAILQ_INIT(&ch->io_ch_head); 673 /* Initialize ch->efd to be invalid and unused. */ 674 ch->efd = -1; 675 if (spdk_interrupt_mode_is_enabled()) { 676 rc = bdev_aio_register_interrupt(ch); 677 if (rc < 0) { 678 SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n"); 679 return rc; 680 } 681 } 682 683 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 684 spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL); 685 686 return 0; 687 } 688 689 static void 690 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 691 { 692 struct bdev_aio_group_channel *ch = ctx_buf; 693 694 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 695 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 696 } 697 698 spdk_poller_unregister(&ch->poller); 699 if (spdk_interrupt_mode_is_enabled()) { 700 bdev_aio_unregister_interrupt(ch); 701 } 702 } 703 704 int 705 create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly) 706 { 707 struct file_disk *fdisk; 708 uint32_t detected_block_size; 709 uint64_t disk_size; 710 int rc; 711 712 fdisk = calloc(1, sizeof(*fdisk)); 713 if (!fdisk) { 714 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 715 return -ENOMEM; 716 } 717 fdisk->readonly = readonly; 718 719 fdisk->filename = strdup(filename); 720 if (!fdisk->filename) { 721 rc = -ENOMEM; 722 goto error_return; 723 } 724 725 if (bdev_aio_open(fdisk)) { 726 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 727 rc = -errno; 728 goto error_return; 729 } 730 731 disk_size = spdk_fd_get_size(fdisk->fd); 732 733 fdisk->disk.name = strdup(name); 734 if (!fdisk->disk.name) { 735 rc = -ENOMEM; 736 goto error_return; 737 } 738 fdisk->disk.product_name = "AIO disk"; 739 fdisk->disk.module = &aio_if; 740 741 fdisk->disk.write_cache = 1; 742 743 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 744 if (block_size == 0) { 745 /* User did not specify block size - use autodetected block size. */ 746 if (detected_block_size == 0) { 747 SPDK_ERRLOG("Block size could not be auto-detected\n"); 748 rc = -EINVAL; 749 goto error_return; 750 } 751 fdisk->block_size_override = false; 752 block_size = detected_block_size; 753 } else { 754 if (block_size < detected_block_size) { 755 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 756 "auto-detected block size %" PRIu32 "\n", 757 block_size, detected_block_size); 758 rc = -EINVAL; 759 goto error_return; 760 } else if (detected_block_size != 0 && block_size != detected_block_size) { 761 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 762 "auto-detected block size %" PRIu32 "\n", 763 block_size, detected_block_size); 764 } 765 fdisk->block_size_override = true; 766 } 767 768 if (block_size < 512) { 769 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 770 rc = -EINVAL; 771 goto error_return; 772 } 773 774 if (!spdk_u32_is_pow2(block_size)) { 775 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 776 rc = -EINVAL; 777 goto error_return; 778 } 779 780 fdisk->disk.blocklen = block_size; 781 if (fdisk->block_size_override && detected_block_size) { 782 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 783 } else { 784 fdisk->disk.required_alignment = spdk_u32log2(block_size); 785 } 786 787 if (disk_size % fdisk->disk.blocklen != 0) { 788 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 789 disk_size, fdisk->disk.blocklen); 790 rc = -EINVAL; 791 goto error_return; 792 } 793 794 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 795 fdisk->disk.ctxt = fdisk; 796 797 fdisk->disk.fn_table = &aio_fn_table; 798 799 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 800 sizeof(struct bdev_aio_io_channel), 801 fdisk->disk.name); 802 rc = spdk_bdev_register(&fdisk->disk); 803 if (rc) { 804 spdk_io_device_unregister(fdisk, NULL); 805 goto error_return; 806 } 807 808 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 809 return 0; 810 811 error_return: 812 bdev_aio_close(fdisk); 813 aio_free_disk(fdisk); 814 return rc; 815 } 816 817 static void 818 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 819 { 820 } 821 822 int 823 bdev_aio_rescan(const char *name) 824 { 825 struct spdk_bdev_desc *desc; 826 struct spdk_bdev *bdev; 827 struct file_disk *fdisk; 828 uint64_t disk_size, blockcnt; 829 int rc; 830 831 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 832 if (rc != 0) { 833 return rc; 834 } 835 836 bdev = spdk_bdev_desc_get_bdev(desc); 837 if (bdev->module != &aio_if) { 838 rc = -ENODEV; 839 goto exit; 840 } 841 842 fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk); 843 disk_size = spdk_fd_get_size(fdisk->fd); 844 blockcnt = disk_size / bdev->blocklen; 845 846 if (bdev->blockcnt != blockcnt) { 847 SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %" 848 PRIu64 "\n", 849 fdisk->filename, 850 bdev->blockcnt, 851 blockcnt); 852 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt); 853 if (rc != 0) { 854 SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n", 855 fdisk->filename, rc); 856 goto exit; 857 } 858 } 859 860 exit: 861 spdk_bdev_close(desc); 862 return rc; 863 } 864 865 struct delete_aio_bdev_ctx { 866 delete_aio_bdev_complete cb_fn; 867 void *cb_arg; 868 }; 869 870 static void 871 aio_bdev_unregister_cb(void *arg, int bdeverrno) 872 { 873 struct delete_aio_bdev_ctx *ctx = arg; 874 875 ctx->cb_fn(ctx->cb_arg, bdeverrno); 876 free(ctx); 877 } 878 879 void 880 bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg) 881 { 882 struct delete_aio_bdev_ctx *ctx; 883 int rc; 884 885 ctx = calloc(1, sizeof(*ctx)); 886 if (ctx == NULL) { 887 cb_fn(cb_arg, -ENOMEM); 888 return; 889 } 890 891 ctx->cb_fn = cb_fn; 892 ctx->cb_arg = cb_arg; 893 rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx); 894 if (rc != 0) { 895 aio_bdev_unregister_cb(ctx, rc); 896 } 897 } 898 899 static int 900 bdev_aio_initialize(void) 901 { 902 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 903 sizeof(struct bdev_aio_group_channel), "aio_module"); 904 905 return 0; 906 } 907 908 static void 909 bdev_aio_fini(void) 910 { 911 spdk_io_device_unregister(&aio_if, NULL); 912 } 913 914 SPDK_LOG_REGISTER_COMPONENT(aio) 915