1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_aio.h" 35 36 #include "spdk/stdinc.h" 37 38 #include "spdk/barrier.h" 39 #include "spdk/bdev.h" 40 #include "spdk/bdev_module.h" 41 #include "spdk/env.h" 42 #include "spdk/fd.h" 43 #include "spdk/likely.h" 44 #include "spdk/thread.h" 45 #include "spdk/json.h" 46 #include "spdk/util.h" 47 #include "spdk/string.h" 48 49 #include "spdk/log.h" 50 51 #include <sys/eventfd.h> 52 #include <libaio.h> 53 54 struct bdev_aio_io_channel { 55 uint64_t io_inflight; 56 io_context_t io_ctx; 57 struct bdev_aio_group_channel *group_ch; 58 TAILQ_ENTRY(bdev_aio_io_channel) link; 59 }; 60 61 struct bdev_aio_group_channel { 62 /* eventfd for io completion notification in interrupt mode. 63 * Negative value like '-1' indicates it is invalid or unused. 64 */ 65 int efd; 66 struct spdk_interrupt *intr; 67 struct spdk_poller *poller; 68 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 69 }; 70 71 struct bdev_aio_task { 72 struct iocb iocb; 73 uint64_t len; 74 struct bdev_aio_io_channel *ch; 75 TAILQ_ENTRY(bdev_aio_task) link; 76 }; 77 78 struct file_disk { 79 struct bdev_aio_task *reset_task; 80 struct spdk_poller *reset_retry_timer; 81 struct spdk_bdev disk; 82 char *filename; 83 int fd; 84 TAILQ_ENTRY(file_disk) link; 85 bool block_size_override; 86 }; 87 88 /* For user space reaping of completions */ 89 struct spdk_aio_ring { 90 uint32_t id; 91 uint32_t size; 92 uint32_t head; 93 uint32_t tail; 94 95 uint32_t version; 96 uint32_t compat_features; 97 uint32_t incompat_features; 98 uint32_t header_length; 99 }; 100 101 #define SPDK_AIO_RING_VERSION 0xa10a10a1 102 103 static int bdev_aio_initialize(void); 104 static void bdev_aio_fini(void); 105 static void aio_free_disk(struct file_disk *fdisk); 106 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 107 108 #define SPDK_AIO_QUEUE_DEPTH 128 109 #define MAX_EVENTS_PER_POLL 32 110 111 static int 112 bdev_aio_get_ctx_size(void) 113 { 114 return sizeof(struct bdev_aio_task); 115 } 116 117 static struct spdk_bdev_module aio_if = { 118 .name = "aio", 119 .module_init = bdev_aio_initialize, 120 .module_fini = bdev_aio_fini, 121 .get_ctx_size = bdev_aio_get_ctx_size, 122 }; 123 124 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 125 126 static int 127 bdev_aio_open(struct file_disk *disk) 128 { 129 int fd; 130 131 fd = open(disk->filename, O_RDWR | O_DIRECT); 132 if (fd < 0) { 133 /* Try without O_DIRECT for non-disk files */ 134 fd = open(disk->filename, O_RDWR); 135 if (fd < 0) { 136 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 137 disk->filename, errno, spdk_strerror(errno)); 138 disk->fd = -1; 139 return -1; 140 } 141 } 142 143 disk->fd = fd; 144 145 return 0; 146 } 147 148 static int 149 bdev_aio_close(struct file_disk *disk) 150 { 151 int rc; 152 153 if (disk->fd == -1) { 154 return 0; 155 } 156 157 rc = close(disk->fd); 158 if (rc < 0) { 159 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 160 disk->fd, errno, spdk_strerror(errno)); 161 return -1; 162 } 163 164 disk->fd = -1; 165 166 return 0; 167 } 168 169 static int64_t 170 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 171 struct bdev_aio_task *aio_task, 172 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 173 { 174 struct iocb *iocb = &aio_task->iocb; 175 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 176 int rc; 177 178 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 179 if (aio_ch->group_ch->efd >= 0) { 180 io_set_eventfd(iocb, aio_ch->group_ch->efd); 181 } 182 iocb->data = aio_task; 183 aio_task->len = nbytes; 184 aio_task->ch = aio_ch; 185 186 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 187 iovcnt, nbytes, offset); 188 189 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 190 if (rc < 0) { 191 if (rc == -EAGAIN) { 192 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 193 } else { 194 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 195 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 196 } 197 return -1; 198 } 199 aio_ch->io_inflight++; 200 return nbytes; 201 } 202 203 static int64_t 204 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 205 struct bdev_aio_task *aio_task, 206 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 207 { 208 struct iocb *iocb = &aio_task->iocb; 209 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 210 int rc; 211 212 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 213 if (aio_ch->group_ch->efd >= 0) { 214 io_set_eventfd(iocb, aio_ch->group_ch->efd); 215 } 216 iocb->data = aio_task; 217 aio_task->len = len; 218 aio_task->ch = aio_ch; 219 220 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 221 iovcnt, len, offset); 222 223 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 224 if (rc < 0) { 225 if (rc == -EAGAIN) { 226 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 227 } else { 228 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 229 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 230 } 231 return -1; 232 } 233 aio_ch->io_inflight++; 234 return len; 235 } 236 237 static void 238 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 239 { 240 int rc = fsync(fdisk->fd); 241 242 if (rc == 0) { 243 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 244 } else { 245 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno); 246 } 247 } 248 249 static void 250 bdev_aio_destruct_cb(void *io_device) 251 { 252 struct file_disk *fdisk = io_device; 253 int rc = 0; 254 255 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 256 rc = bdev_aio_close(fdisk); 257 if (rc < 0) { 258 SPDK_ERRLOG("bdev_aio_close() failed\n"); 259 } 260 261 aio_free_disk(fdisk); 262 } 263 264 static int 265 bdev_aio_destruct(void *ctx) 266 { 267 struct file_disk *fdisk = ctx; 268 269 spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb); 270 271 return 0; 272 } 273 274 static int 275 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 276 { 277 uint32_t head, tail, count; 278 struct spdk_aio_ring *ring; 279 struct timespec timeout; 280 struct io_event *kevents; 281 282 ring = (struct spdk_aio_ring *)io_ctx; 283 284 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 285 timeout.tv_sec = 0; 286 timeout.tv_nsec = 0; 287 288 return io_getevents(io_ctx, 0, max, uevents, &timeout); 289 } 290 291 /* Read the current state out of the ring */ 292 head = ring->head; 293 tail = ring->tail; 294 295 /* This memory barrier is required to prevent the loads above 296 * from being re-ordered with stores to the events array 297 * potentially occurring on other threads. */ 298 spdk_smp_rmb(); 299 300 /* Calculate how many items are in the circular ring */ 301 count = tail - head; 302 if (tail < head) { 303 count += ring->size; 304 } 305 306 /* Reduce the count to the limit provided by the user */ 307 count = spdk_min(max, count); 308 309 /* Grab the memory location of the event array */ 310 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 311 312 /* Copy the events out of the ring. */ 313 if ((head + count) <= ring->size) { 314 /* Only one copy is required */ 315 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 316 } else { 317 uint32_t first_part = ring->size - head; 318 /* Two copies are required */ 319 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 320 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 321 } 322 323 /* Update the head pointer. On x86, stores will not be reordered with older loads, 324 * so the copies out of the event array will always be complete prior to this 325 * update becoming visible. On other architectures this is not guaranteed, so 326 * add a barrier. */ 327 #if defined(__i386__) || defined(__x86_64__) 328 spdk_compiler_barrier(); 329 #else 330 spdk_smp_mb(); 331 #endif 332 ring->head = (head + count) % ring->size; 333 334 return count; 335 } 336 337 static int 338 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 339 { 340 int nr, i = 0; 341 struct bdev_aio_task *aio_task; 342 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 343 uint64_t io_result; 344 345 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 346 347 if (nr < 0) { 348 return 0; 349 } 350 351 #define MAX_AIO_ERRNO 256 352 for (i = 0; i < nr; i++) { 353 aio_task = events[i].data; 354 aio_task->ch->io_inflight--; 355 io_result = events[i].res; 356 if (io_result == aio_task->len) { 357 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 358 } else if (io_result < MAX_AIO_ERRNO) { 359 /* Linux AIO will return its errno to io_event.res */ 360 int aio_errno = io_result; 361 362 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -aio_errno); 363 } else { 364 SPDK_ERRLOG("failed to complete aio: requested len is %lu, but completed len is %lu.\n", 365 aio_task->len, io_result); 366 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 367 } 368 } 369 370 return nr; 371 } 372 373 static int 374 bdev_aio_group_poll(void *arg) 375 { 376 struct bdev_aio_group_channel *group_ch = arg; 377 struct bdev_aio_io_channel *io_ch; 378 int nr = 0; 379 380 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 381 nr += bdev_aio_io_channel_poll(io_ch); 382 } 383 384 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 385 } 386 387 static int 388 bdev_aio_group_interrupt(void *arg) 389 { 390 struct bdev_aio_group_channel *group_ch = arg; 391 int rc; 392 uint64_t num_events; 393 394 assert(group_ch->efd >= 0); 395 396 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 397 * io_getevent should be called again to ensure all completed IO are processed. 398 */ 399 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 400 if (rc < 0) { 401 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 402 return -errno; 403 } 404 405 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 406 num_events -= SPDK_AIO_QUEUE_DEPTH; 407 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 408 if (rc < 0) { 409 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 410 } 411 } 412 413 return bdev_aio_group_poll(group_ch); 414 } 415 416 static void 417 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 418 { 419 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 420 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 421 422 if (aio_ch->io_inflight) { 423 spdk_for_each_channel_continue(i, -1); 424 return; 425 } 426 427 spdk_for_each_channel_continue(i, 0); 428 } 429 430 static int bdev_aio_reset_retry_timer(void *arg); 431 432 static void 433 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 434 { 435 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 436 437 if (status == -1) { 438 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 439 return; 440 } 441 442 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 443 } 444 445 static int 446 bdev_aio_reset_retry_timer(void *arg) 447 { 448 struct file_disk *fdisk = arg; 449 450 if (fdisk->reset_retry_timer) { 451 spdk_poller_unregister(&fdisk->reset_retry_timer); 452 } 453 454 spdk_for_each_channel(fdisk, 455 _bdev_aio_get_io_inflight, 456 fdisk, 457 _bdev_aio_get_io_inflight_done); 458 459 return SPDK_POLLER_BUSY; 460 } 461 462 static void 463 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 464 { 465 fdisk->reset_task = aio_task; 466 467 bdev_aio_reset_retry_timer(fdisk); 468 } 469 470 static void 471 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 472 bool success) 473 { 474 if (!success) { 475 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 476 return; 477 } 478 479 switch (bdev_io->type) { 480 case SPDK_BDEV_IO_TYPE_READ: 481 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 482 ch, 483 (struct bdev_aio_task *)bdev_io->driver_ctx, 484 bdev_io->u.bdev.iovs, 485 bdev_io->u.bdev.iovcnt, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 487 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 488 break; 489 case SPDK_BDEV_IO_TYPE_WRITE: 490 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 491 ch, 492 (struct bdev_aio_task *)bdev_io->driver_ctx, 493 bdev_io->u.bdev.iovs, 494 bdev_io->u.bdev.iovcnt, 495 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 496 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 497 break; 498 default: 499 SPDK_ERRLOG("Wrong io type\n"); 500 break; 501 } 502 } 503 504 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 505 { 506 switch (bdev_io->type) { 507 /* Read and write operations must be performed on buffers aligned to 508 * bdev->required_alignment. If user specified unaligned buffers, 509 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 510 case SPDK_BDEV_IO_TYPE_READ: 511 case SPDK_BDEV_IO_TYPE_WRITE: 512 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 513 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 514 return 0; 515 case SPDK_BDEV_IO_TYPE_FLUSH: 516 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 517 (struct bdev_aio_task *)bdev_io->driver_ctx); 518 return 0; 519 520 case SPDK_BDEV_IO_TYPE_RESET: 521 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 522 (struct bdev_aio_task *)bdev_io->driver_ctx); 523 return 0; 524 default: 525 return -1; 526 } 527 } 528 529 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 530 { 531 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 532 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 533 } 534 } 535 536 static bool 537 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 538 { 539 switch (io_type) { 540 case SPDK_BDEV_IO_TYPE_READ: 541 case SPDK_BDEV_IO_TYPE_WRITE: 542 case SPDK_BDEV_IO_TYPE_FLUSH: 543 case SPDK_BDEV_IO_TYPE_RESET: 544 return true; 545 546 default: 547 return false; 548 } 549 } 550 551 static int 552 bdev_aio_create_cb(void *io_device, void *ctx_buf) 553 { 554 struct bdev_aio_io_channel *ch = ctx_buf; 555 556 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 557 SPDK_ERRLOG("async I/O context setup failure\n"); 558 return -1; 559 } 560 561 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 562 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 563 564 return 0; 565 } 566 567 static void 568 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 569 { 570 struct bdev_aio_io_channel *ch = ctx_buf; 571 572 io_destroy(ch->io_ctx); 573 574 assert(ch->group_ch); 575 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 576 577 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 578 } 579 580 static struct spdk_io_channel * 581 bdev_aio_get_io_channel(void *ctx) 582 { 583 struct file_disk *fdisk = ctx; 584 585 return spdk_get_io_channel(fdisk); 586 } 587 588 589 static int 590 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 591 { 592 struct file_disk *fdisk = ctx; 593 594 spdk_json_write_named_object_begin(w, "aio"); 595 596 spdk_json_write_named_string(w, "filename", fdisk->filename); 597 598 spdk_json_write_object_end(w); 599 600 return 0; 601 } 602 603 static void 604 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 605 { 606 struct file_disk *fdisk = bdev->ctxt; 607 608 spdk_json_write_object_begin(w); 609 610 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 611 612 spdk_json_write_named_object_begin(w, "params"); 613 spdk_json_write_named_string(w, "name", bdev->name); 614 if (fdisk->block_size_override) { 615 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 616 } 617 spdk_json_write_named_string(w, "filename", fdisk->filename); 618 spdk_json_write_object_end(w); 619 620 spdk_json_write_object_end(w); 621 } 622 623 static const struct spdk_bdev_fn_table aio_fn_table = { 624 .destruct = bdev_aio_destruct, 625 .submit_request = bdev_aio_submit_request, 626 .io_type_supported = bdev_aio_io_type_supported, 627 .get_io_channel = bdev_aio_get_io_channel, 628 .dump_info_json = bdev_aio_dump_info_json, 629 .write_config_json = bdev_aio_write_json_config, 630 }; 631 632 static void aio_free_disk(struct file_disk *fdisk) 633 { 634 if (fdisk == NULL) { 635 return; 636 } 637 free(fdisk->filename); 638 free(fdisk->disk.name); 639 free(fdisk); 640 } 641 642 static int 643 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 644 { 645 int efd; 646 647 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 648 if (efd < 0) { 649 return -1; 650 } 651 652 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 653 if (ch->intr == NULL) { 654 close(efd); 655 return -1; 656 } 657 ch->efd = efd; 658 659 return 0; 660 } 661 662 static void 663 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 664 { 665 spdk_interrupt_unregister(&ch->intr); 666 close(ch->efd); 667 ch->efd = -1; 668 } 669 670 static void 671 bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode) 672 { 673 return; 674 } 675 676 static int 677 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 678 { 679 struct bdev_aio_group_channel *ch = ctx_buf; 680 int rc; 681 682 TAILQ_INIT(&ch->io_ch_head); 683 /* Initialize ch->efd to be invalid and unused. */ 684 ch->efd = -1; 685 if (spdk_interrupt_mode_is_enabled()) { 686 rc = bdev_aio_register_interrupt(ch); 687 if (rc < 0) { 688 SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n"); 689 return rc; 690 } 691 } 692 693 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 694 spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL); 695 696 return 0; 697 } 698 699 static void 700 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 701 { 702 struct bdev_aio_group_channel *ch = ctx_buf; 703 704 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 705 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 706 } 707 708 spdk_poller_unregister(&ch->poller); 709 if (spdk_interrupt_mode_is_enabled()) { 710 bdev_aio_unregister_interrupt(ch); 711 } 712 } 713 714 int 715 create_aio_bdev(const char *name, const char *filename, uint32_t block_size) 716 { 717 struct file_disk *fdisk; 718 uint32_t detected_block_size; 719 uint64_t disk_size; 720 int rc; 721 722 fdisk = calloc(1, sizeof(*fdisk)); 723 if (!fdisk) { 724 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 725 return -ENOMEM; 726 } 727 728 fdisk->filename = strdup(filename); 729 if (!fdisk->filename) { 730 rc = -ENOMEM; 731 goto error_return; 732 } 733 734 if (bdev_aio_open(fdisk)) { 735 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 736 rc = -errno; 737 goto error_return; 738 } 739 740 disk_size = spdk_fd_get_size(fdisk->fd); 741 742 fdisk->disk.name = strdup(name); 743 if (!fdisk->disk.name) { 744 rc = -ENOMEM; 745 goto error_return; 746 } 747 fdisk->disk.product_name = "AIO disk"; 748 fdisk->disk.module = &aio_if; 749 750 fdisk->disk.write_cache = 1; 751 752 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 753 if (block_size == 0) { 754 /* User did not specify block size - use autodetected block size. */ 755 if (detected_block_size == 0) { 756 SPDK_ERRLOG("Block size could not be auto-detected\n"); 757 rc = -EINVAL; 758 goto error_return; 759 } 760 fdisk->block_size_override = false; 761 block_size = detected_block_size; 762 } else { 763 if (block_size < detected_block_size) { 764 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 765 "auto-detected block size %" PRIu32 "\n", 766 block_size, detected_block_size); 767 rc = -EINVAL; 768 goto error_return; 769 } else if (detected_block_size != 0 && block_size != detected_block_size) { 770 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 771 "auto-detected block size %" PRIu32 "\n", 772 block_size, detected_block_size); 773 } 774 fdisk->block_size_override = true; 775 } 776 777 if (block_size < 512) { 778 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 779 rc = -EINVAL; 780 goto error_return; 781 } 782 783 if (!spdk_u32_is_pow2(block_size)) { 784 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 785 rc = -EINVAL; 786 goto error_return; 787 } 788 789 fdisk->disk.blocklen = block_size; 790 if (fdisk->block_size_override && detected_block_size) { 791 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 792 } else { 793 fdisk->disk.required_alignment = spdk_u32log2(block_size); 794 } 795 796 if (disk_size % fdisk->disk.blocklen != 0) { 797 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 798 disk_size, fdisk->disk.blocklen); 799 rc = -EINVAL; 800 goto error_return; 801 } 802 803 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 804 fdisk->disk.ctxt = fdisk; 805 806 fdisk->disk.fn_table = &aio_fn_table; 807 808 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 809 sizeof(struct bdev_aio_io_channel), 810 fdisk->disk.name); 811 rc = spdk_bdev_register(&fdisk->disk); 812 if (rc) { 813 spdk_io_device_unregister(fdisk, NULL); 814 goto error_return; 815 } 816 817 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 818 return 0; 819 820 error_return: 821 bdev_aio_close(fdisk); 822 aio_free_disk(fdisk); 823 return rc; 824 } 825 826 struct delete_aio_bdev_ctx { 827 delete_aio_bdev_complete cb_fn; 828 void *cb_arg; 829 }; 830 831 static void 832 aio_bdev_unregister_cb(void *arg, int bdeverrno) 833 { 834 struct delete_aio_bdev_ctx *ctx = arg; 835 836 ctx->cb_fn(ctx->cb_arg, bdeverrno); 837 free(ctx); 838 } 839 840 void 841 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg) 842 { 843 struct delete_aio_bdev_ctx *ctx; 844 845 if (!bdev || bdev->module != &aio_if) { 846 cb_fn(cb_arg, -ENODEV); 847 return; 848 } 849 850 ctx = calloc(1, sizeof(*ctx)); 851 if (ctx == NULL) { 852 cb_fn(cb_arg, -ENOMEM); 853 return; 854 } 855 856 ctx->cb_fn = cb_fn; 857 ctx->cb_arg = cb_arg; 858 spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx); 859 } 860 861 static int 862 bdev_aio_initialize(void) 863 { 864 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 865 sizeof(struct bdev_aio_group_channel), "aio_module"); 866 867 return 0; 868 } 869 870 static void 871 bdev_aio_fini(void) 872 { 873 spdk_io_device_unregister(&aio_if, NULL); 874 } 875 876 SPDK_LOG_REGISTER_COMPONENT(aio) 877