1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_aio.h" 8 9 #include "spdk/stdinc.h" 10 11 #include "spdk/barrier.h" 12 #include "spdk/bdev.h" 13 #include "spdk/bdev_module.h" 14 #include "spdk/env.h" 15 #include "spdk/fd.h" 16 #include "spdk/likely.h" 17 #include "spdk/thread.h" 18 #include "spdk/json.h" 19 #include "spdk/util.h" 20 #include "spdk/string.h" 21 22 #include "spdk/log.h" 23 24 #include <sys/eventfd.h> 25 #include <libaio.h> 26 27 struct bdev_aio_io_channel { 28 uint64_t io_inflight; 29 io_context_t io_ctx; 30 struct bdev_aio_group_channel *group_ch; 31 TAILQ_ENTRY(bdev_aio_io_channel) link; 32 }; 33 34 struct bdev_aio_group_channel { 35 /* eventfd for io completion notification in interrupt mode. 36 * Negative value like '-1' indicates it is invalid or unused. 37 */ 38 int efd; 39 struct spdk_interrupt *intr; 40 struct spdk_poller *poller; 41 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 42 }; 43 44 struct bdev_aio_task { 45 struct iocb iocb; 46 uint64_t len; 47 struct bdev_aio_io_channel *ch; 48 }; 49 50 struct file_disk { 51 struct bdev_aio_task *reset_task; 52 struct spdk_poller *reset_retry_timer; 53 struct spdk_bdev disk; 54 char *filename; 55 int fd; 56 TAILQ_ENTRY(file_disk) link; 57 bool block_size_override; 58 bool readonly; 59 }; 60 61 /* For user space reaping of completions */ 62 struct spdk_aio_ring { 63 uint32_t id; 64 uint32_t size; 65 uint32_t head; 66 uint32_t tail; 67 68 uint32_t version; 69 uint32_t compat_features; 70 uint32_t incompat_features; 71 uint32_t header_length; 72 }; 73 74 #define SPDK_AIO_RING_VERSION 0xa10a10a1 75 76 static int bdev_aio_initialize(void); 77 static void bdev_aio_fini(void); 78 static void aio_free_disk(struct file_disk *fdisk); 79 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 80 81 #define SPDK_AIO_QUEUE_DEPTH 128 82 #define MAX_EVENTS_PER_POLL 32 83 84 static int 85 bdev_aio_get_ctx_size(void) 86 { 87 return sizeof(struct bdev_aio_task); 88 } 89 90 static struct spdk_bdev_module aio_if = { 91 .name = "aio", 92 .module_init = bdev_aio_initialize, 93 .module_fini = bdev_aio_fini, 94 .get_ctx_size = bdev_aio_get_ctx_size, 95 }; 96 97 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 98 99 static int 100 bdev_aio_open(struct file_disk *disk) 101 { 102 int fd; 103 int io_flag = disk->readonly ? O_RDONLY : O_RDWR; 104 105 fd = open(disk->filename, io_flag | O_DIRECT); 106 if (fd < 0) { 107 /* Try without O_DIRECT for non-disk files */ 108 fd = open(disk->filename, io_flag); 109 if (fd < 0) { 110 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 111 disk->filename, errno, spdk_strerror(errno)); 112 disk->fd = -1; 113 return -1; 114 } 115 } 116 117 disk->fd = fd; 118 119 return 0; 120 } 121 122 static int 123 bdev_aio_close(struct file_disk *disk) 124 { 125 int rc; 126 127 if (disk->fd == -1) { 128 return 0; 129 } 130 131 rc = close(disk->fd); 132 if (rc < 0) { 133 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 134 disk->fd, errno, spdk_strerror(errno)); 135 return -1; 136 } 137 138 disk->fd = -1; 139 140 return 0; 141 } 142 143 static void 144 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 145 struct bdev_aio_task *aio_task, 146 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 147 { 148 struct iocb *iocb = &aio_task->iocb; 149 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 150 int rc; 151 152 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 153 if (aio_ch->group_ch->efd >= 0) { 154 io_set_eventfd(iocb, aio_ch->group_ch->efd); 155 } 156 iocb->data = aio_task; 157 aio_task->len = nbytes; 158 aio_task->ch = aio_ch; 159 160 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 161 iovcnt, nbytes, offset); 162 163 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 164 if (spdk_unlikely(rc < 0)) { 165 if (rc == -EAGAIN) { 166 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 167 } else { 168 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 169 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 170 } 171 } else { 172 aio_ch->io_inflight++; 173 } 174 } 175 176 static void 177 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 178 struct bdev_aio_task *aio_task, 179 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 180 { 181 struct iocb *iocb = &aio_task->iocb; 182 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 183 int rc; 184 185 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 186 if (aio_ch->group_ch->efd >= 0) { 187 io_set_eventfd(iocb, aio_ch->group_ch->efd); 188 } 189 iocb->data = aio_task; 190 aio_task->len = len; 191 aio_task->ch = aio_ch; 192 193 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 194 iovcnt, len, offset); 195 196 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 197 if (spdk_unlikely(rc < 0)) { 198 if (rc == -EAGAIN) { 199 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 200 } else { 201 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 202 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 203 } 204 } else { 205 aio_ch->io_inflight++; 206 } 207 } 208 209 static void 210 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 211 { 212 int rc = fsync(fdisk->fd); 213 214 if (rc == 0) { 215 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 216 } else { 217 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno); 218 } 219 } 220 221 static void 222 bdev_aio_destruct_cb(void *io_device) 223 { 224 struct file_disk *fdisk = io_device; 225 int rc = 0; 226 227 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 228 rc = bdev_aio_close(fdisk); 229 if (rc < 0) { 230 SPDK_ERRLOG("bdev_aio_close() failed\n"); 231 } 232 aio_free_disk(fdisk); 233 } 234 235 static int 236 bdev_aio_destruct(void *ctx) 237 { 238 struct file_disk *fdisk = ctx; 239 240 spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb); 241 242 return 0; 243 } 244 245 static int 246 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 247 { 248 uint32_t head, tail, count; 249 struct spdk_aio_ring *ring; 250 struct timespec timeout; 251 struct io_event *kevents; 252 253 ring = (struct spdk_aio_ring *)io_ctx; 254 255 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 256 timeout.tv_sec = 0; 257 timeout.tv_nsec = 0; 258 259 return io_getevents(io_ctx, 0, max, uevents, &timeout); 260 } 261 262 /* Read the current state out of the ring */ 263 head = ring->head; 264 tail = ring->tail; 265 266 /* This memory barrier is required to prevent the loads above 267 * from being re-ordered with stores to the events array 268 * potentially occurring on other threads. */ 269 spdk_smp_rmb(); 270 271 /* Calculate how many items are in the circular ring */ 272 count = tail - head; 273 if (tail < head) { 274 count += ring->size; 275 } 276 277 /* Reduce the count to the limit provided by the user */ 278 count = spdk_min(max, count); 279 280 /* Grab the memory location of the event array */ 281 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 282 283 /* Copy the events out of the ring. */ 284 if ((head + count) <= ring->size) { 285 /* Only one copy is required */ 286 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 287 } else { 288 uint32_t first_part = ring->size - head; 289 /* Two copies are required */ 290 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 291 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 292 } 293 294 /* Update the head pointer. On x86, stores will not be reordered with older loads, 295 * so the copies out of the event array will always be complete prior to this 296 * update becoming visible. On other architectures this is not guaranteed, so 297 * add a barrier. */ 298 #if defined(__i386__) || defined(__x86_64__) 299 spdk_compiler_barrier(); 300 #else 301 spdk_smp_mb(); 302 #endif 303 ring->head = (head + count) % ring->size; 304 305 return count; 306 } 307 308 static int 309 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 310 { 311 int nr, i, res = 0; 312 struct bdev_aio_task *aio_task; 313 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 314 315 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 316 if (nr < 0) { 317 return 0; 318 } 319 320 for (i = 0; i < nr; i++) { 321 aio_task = events[i].data; 322 aio_task->ch->io_inflight--; 323 if (events[i].res == aio_task->len) { 324 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 325 } else { 326 /* From aio_abi.h, io_event.res is defined __s64, negative errno 327 * will be assigned to io_event.res for error situation. 328 * But from libaio.h, io_event.res is defined unsigned long, so 329 * convert it to signed value for error detection. 330 */ 331 SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res); 332 res = (int)events[i].res; 333 if (res < 0) { 334 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res); 335 } else { 336 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 337 } 338 } 339 } 340 341 return nr; 342 } 343 344 static int 345 bdev_aio_group_poll(void *arg) 346 { 347 struct bdev_aio_group_channel *group_ch = arg; 348 struct bdev_aio_io_channel *io_ch; 349 int nr = 0; 350 351 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 352 nr += bdev_aio_io_channel_poll(io_ch); 353 } 354 355 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 356 } 357 358 static int 359 bdev_aio_group_interrupt(void *arg) 360 { 361 struct bdev_aio_group_channel *group_ch = arg; 362 int rc; 363 uint64_t num_events; 364 365 assert(group_ch->efd >= 0); 366 367 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 368 * io_getevent should be called again to ensure all completed IO are processed. 369 */ 370 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 371 if (rc < 0) { 372 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 373 return -errno; 374 } 375 376 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 377 num_events -= SPDK_AIO_QUEUE_DEPTH; 378 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 379 if (rc < 0) { 380 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 381 } 382 } 383 384 return bdev_aio_group_poll(group_ch); 385 } 386 387 static void 388 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 389 { 390 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 391 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 392 393 if (aio_ch->io_inflight) { 394 spdk_for_each_channel_continue(i, -1); 395 return; 396 } 397 398 spdk_for_each_channel_continue(i, 0); 399 } 400 401 static int bdev_aio_reset_retry_timer(void *arg); 402 403 static void 404 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 405 { 406 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 407 408 if (status == -1) { 409 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 410 return; 411 } 412 413 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 414 } 415 416 static int 417 bdev_aio_reset_retry_timer(void *arg) 418 { 419 struct file_disk *fdisk = arg; 420 421 if (fdisk->reset_retry_timer) { 422 spdk_poller_unregister(&fdisk->reset_retry_timer); 423 } 424 425 spdk_for_each_channel(fdisk, 426 _bdev_aio_get_io_inflight, 427 fdisk, 428 _bdev_aio_get_io_inflight_done); 429 430 return SPDK_POLLER_BUSY; 431 } 432 433 static void 434 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 435 { 436 fdisk->reset_task = aio_task; 437 438 bdev_aio_reset_retry_timer(fdisk); 439 } 440 441 static void 442 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 443 bool success) 444 { 445 if (!success) { 446 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 447 return; 448 } 449 450 switch (bdev_io->type) { 451 case SPDK_BDEV_IO_TYPE_READ: 452 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 453 ch, 454 (struct bdev_aio_task *)bdev_io->driver_ctx, 455 bdev_io->u.bdev.iovs, 456 bdev_io->u.bdev.iovcnt, 457 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 458 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 459 break; 460 case SPDK_BDEV_IO_TYPE_WRITE: 461 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 462 ch, 463 (struct bdev_aio_task *)bdev_io->driver_ctx, 464 bdev_io->u.bdev.iovs, 465 bdev_io->u.bdev.iovcnt, 466 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 467 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 468 break; 469 default: 470 SPDK_ERRLOG("Wrong io type\n"); 471 break; 472 } 473 } 474 475 static int 476 _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 477 { 478 struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt; 479 480 switch (bdev_io->type) { 481 /* Read and write operations must be performed on buffers aligned to 482 * bdev->required_alignment. If user specified unaligned buffers, 483 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 484 case SPDK_BDEV_IO_TYPE_READ: 485 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 487 return 0; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 if (fdisk->readonly) { 490 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 491 } else { 492 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 493 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 494 } 495 return 0; 496 497 case SPDK_BDEV_IO_TYPE_FLUSH: 498 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 499 (struct bdev_aio_task *)bdev_io->driver_ctx); 500 return 0; 501 502 case SPDK_BDEV_IO_TYPE_RESET: 503 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 504 (struct bdev_aio_task *)bdev_io->driver_ctx); 505 return 0; 506 default: 507 return -1; 508 } 509 } 510 511 static void 512 bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 513 { 514 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 515 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 516 } 517 } 518 519 static bool 520 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 521 { 522 switch (io_type) { 523 case SPDK_BDEV_IO_TYPE_READ: 524 case SPDK_BDEV_IO_TYPE_WRITE: 525 case SPDK_BDEV_IO_TYPE_FLUSH: 526 case SPDK_BDEV_IO_TYPE_RESET: 527 return true; 528 529 default: 530 return false; 531 } 532 } 533 534 static int 535 bdev_aio_create_cb(void *io_device, void *ctx_buf) 536 { 537 struct bdev_aio_io_channel *ch = ctx_buf; 538 539 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 540 SPDK_ERRLOG("async I/O context setup failure\n"); 541 return -1; 542 } 543 544 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 545 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 546 547 return 0; 548 } 549 550 static void 551 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 552 { 553 struct bdev_aio_io_channel *ch = ctx_buf; 554 555 io_destroy(ch->io_ctx); 556 557 assert(ch->group_ch); 558 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 559 560 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 561 } 562 563 static struct spdk_io_channel * 564 bdev_aio_get_io_channel(void *ctx) 565 { 566 struct file_disk *fdisk = ctx; 567 568 return spdk_get_io_channel(fdisk); 569 } 570 571 572 static int 573 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 574 { 575 struct file_disk *fdisk = ctx; 576 577 spdk_json_write_named_object_begin(w, "aio"); 578 579 spdk_json_write_named_string(w, "filename", fdisk->filename); 580 581 spdk_json_write_named_bool(w, "block_size_override", fdisk->block_size_override); 582 583 spdk_json_write_named_bool(w, "readonly", fdisk->readonly); 584 585 spdk_json_write_object_end(w); 586 587 return 0; 588 } 589 590 static void 591 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 592 { 593 struct file_disk *fdisk = bdev->ctxt; 594 595 spdk_json_write_object_begin(w); 596 597 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 598 599 spdk_json_write_named_object_begin(w, "params"); 600 spdk_json_write_named_string(w, "name", bdev->name); 601 if (fdisk->block_size_override) { 602 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 603 } 604 spdk_json_write_named_string(w, "filename", fdisk->filename); 605 spdk_json_write_named_bool(w, "readonly", fdisk->readonly); 606 spdk_json_write_object_end(w); 607 608 spdk_json_write_object_end(w); 609 } 610 611 static const struct spdk_bdev_fn_table aio_fn_table = { 612 .destruct = bdev_aio_destruct, 613 .submit_request = bdev_aio_submit_request, 614 .io_type_supported = bdev_aio_io_type_supported, 615 .get_io_channel = bdev_aio_get_io_channel, 616 .dump_info_json = bdev_aio_dump_info_json, 617 .write_config_json = bdev_aio_write_json_config, 618 }; 619 620 static void 621 aio_free_disk(struct file_disk *fdisk) 622 { 623 if (fdisk == NULL) { 624 return; 625 } 626 free(fdisk->filename); 627 free(fdisk->disk.name); 628 free(fdisk); 629 } 630 631 static int 632 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 633 { 634 int efd; 635 636 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 637 if (efd < 0) { 638 return -1; 639 } 640 641 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 642 if (ch->intr == NULL) { 643 close(efd); 644 return -1; 645 } 646 ch->efd = efd; 647 648 return 0; 649 } 650 651 static void 652 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 653 { 654 spdk_interrupt_unregister(&ch->intr); 655 close(ch->efd); 656 ch->efd = -1; 657 } 658 659 static void 660 bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode) 661 { 662 return; 663 } 664 665 static int 666 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 667 { 668 struct bdev_aio_group_channel *ch = ctx_buf; 669 int rc; 670 671 TAILQ_INIT(&ch->io_ch_head); 672 /* Initialize ch->efd to be invalid and unused. */ 673 ch->efd = -1; 674 if (spdk_interrupt_mode_is_enabled()) { 675 rc = bdev_aio_register_interrupt(ch); 676 if (rc < 0) { 677 SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n"); 678 return rc; 679 } 680 } 681 682 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 683 spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL); 684 685 return 0; 686 } 687 688 static void 689 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 690 { 691 struct bdev_aio_group_channel *ch = ctx_buf; 692 693 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 694 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 695 } 696 697 spdk_poller_unregister(&ch->poller); 698 if (spdk_interrupt_mode_is_enabled()) { 699 bdev_aio_unregister_interrupt(ch); 700 } 701 } 702 703 int 704 create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly) 705 { 706 struct file_disk *fdisk; 707 uint32_t detected_block_size; 708 uint64_t disk_size; 709 int rc; 710 711 fdisk = calloc(1, sizeof(*fdisk)); 712 if (!fdisk) { 713 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 714 return -ENOMEM; 715 } 716 fdisk->readonly = readonly; 717 718 fdisk->filename = strdup(filename); 719 if (!fdisk->filename) { 720 rc = -ENOMEM; 721 goto error_return; 722 } 723 724 if (bdev_aio_open(fdisk)) { 725 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 726 rc = -errno; 727 goto error_return; 728 } 729 730 disk_size = spdk_fd_get_size(fdisk->fd); 731 732 fdisk->disk.name = strdup(name); 733 if (!fdisk->disk.name) { 734 rc = -ENOMEM; 735 goto error_return; 736 } 737 fdisk->disk.product_name = "AIO disk"; 738 fdisk->disk.module = &aio_if; 739 740 fdisk->disk.write_cache = 1; 741 742 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 743 if (block_size == 0) { 744 /* User did not specify block size - use autodetected block size. */ 745 if (detected_block_size == 0) { 746 SPDK_ERRLOG("Block size could not be auto-detected\n"); 747 rc = -EINVAL; 748 goto error_return; 749 } 750 fdisk->block_size_override = false; 751 block_size = detected_block_size; 752 } else { 753 if (block_size < detected_block_size) { 754 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 755 "auto-detected block size %" PRIu32 "\n", 756 block_size, detected_block_size); 757 rc = -EINVAL; 758 goto error_return; 759 } else if (detected_block_size != 0 && block_size != detected_block_size) { 760 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 761 "auto-detected block size %" PRIu32 "\n", 762 block_size, detected_block_size); 763 } 764 fdisk->block_size_override = true; 765 } 766 767 if (block_size < 512) { 768 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 769 rc = -EINVAL; 770 goto error_return; 771 } 772 773 if (!spdk_u32_is_pow2(block_size)) { 774 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 775 rc = -EINVAL; 776 goto error_return; 777 } 778 779 fdisk->disk.blocklen = block_size; 780 if (fdisk->block_size_override && detected_block_size) { 781 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 782 } else { 783 fdisk->disk.required_alignment = spdk_u32log2(block_size); 784 } 785 786 if (disk_size % fdisk->disk.blocklen != 0) { 787 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 788 disk_size, fdisk->disk.blocklen); 789 rc = -EINVAL; 790 goto error_return; 791 } 792 793 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 794 fdisk->disk.ctxt = fdisk; 795 796 fdisk->disk.fn_table = &aio_fn_table; 797 798 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 799 sizeof(struct bdev_aio_io_channel), 800 fdisk->disk.name); 801 rc = spdk_bdev_register(&fdisk->disk); 802 if (rc) { 803 spdk_io_device_unregister(fdisk, NULL); 804 goto error_return; 805 } 806 807 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 808 return 0; 809 810 error_return: 811 bdev_aio_close(fdisk); 812 aio_free_disk(fdisk); 813 return rc; 814 } 815 816 static void 817 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 818 { 819 } 820 821 int 822 bdev_aio_rescan(const char *name) 823 { 824 struct spdk_bdev_desc *desc; 825 struct spdk_bdev *bdev; 826 struct file_disk *fdisk; 827 uint64_t disk_size, blockcnt; 828 int rc; 829 830 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 831 if (rc != 0) { 832 return rc; 833 } 834 835 bdev = spdk_bdev_desc_get_bdev(desc); 836 if (bdev->module != &aio_if) { 837 rc = -ENODEV; 838 goto exit; 839 } 840 841 fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk); 842 disk_size = spdk_fd_get_size(fdisk->fd); 843 blockcnt = disk_size / bdev->blocklen; 844 845 if (bdev->blockcnt != blockcnt) { 846 SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %" 847 PRIu64 "\n", 848 fdisk->filename, 849 bdev->blockcnt, 850 blockcnt); 851 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt); 852 if (rc != 0) { 853 SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n", 854 fdisk->filename, rc); 855 goto exit; 856 } 857 } 858 859 exit: 860 spdk_bdev_close(desc); 861 return rc; 862 } 863 864 struct delete_aio_bdev_ctx { 865 delete_aio_bdev_complete cb_fn; 866 void *cb_arg; 867 }; 868 869 static void 870 aio_bdev_unregister_cb(void *arg, int bdeverrno) 871 { 872 struct delete_aio_bdev_ctx *ctx = arg; 873 874 ctx->cb_fn(ctx->cb_arg, bdeverrno); 875 free(ctx); 876 } 877 878 void 879 bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg) 880 { 881 struct delete_aio_bdev_ctx *ctx; 882 int rc; 883 884 ctx = calloc(1, sizeof(*ctx)); 885 if (ctx == NULL) { 886 cb_fn(cb_arg, -ENOMEM); 887 return; 888 } 889 890 ctx->cb_fn = cb_fn; 891 ctx->cb_arg = cb_arg; 892 rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx); 893 if (rc != 0) { 894 aio_bdev_unregister_cb(ctx, rc); 895 } 896 } 897 898 static int 899 bdev_aio_initialize(void) 900 { 901 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 902 sizeof(struct bdev_aio_group_channel), "aio_module"); 903 904 return 0; 905 } 906 907 static void 908 bdev_aio_fini(void) 909 { 910 spdk_io_device_unregister(&aio_if, NULL); 911 } 912 913 SPDK_LOG_REGISTER_COMPONENT(aio) 914