1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_aio.h" 35 36 #include "spdk/stdinc.h" 37 38 #include "spdk/barrier.h" 39 #include "spdk/bdev.h" 40 #include "spdk/bdev_module.h" 41 #include "spdk/env.h" 42 #include "spdk/fd.h" 43 #include "spdk/likely.h" 44 #include "spdk/thread.h" 45 #include "spdk/json.h" 46 #include "spdk/util.h" 47 #include "spdk/string.h" 48 49 #include "spdk/log.h" 50 51 #include <sys/eventfd.h> 52 #include <libaio.h> 53 54 struct bdev_aio_io_channel { 55 uint64_t io_inflight; 56 io_context_t io_ctx; 57 struct bdev_aio_group_channel *group_ch; 58 TAILQ_ENTRY(bdev_aio_io_channel) link; 59 }; 60 61 struct bdev_aio_group_channel { 62 /* eventfd for io completion notification in interrupt mode. 63 * Negative value like '-1' indicates it is invalid or unused. 64 */ 65 int efd; 66 struct spdk_interrupt *intr; 67 struct spdk_poller *poller; 68 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 69 }; 70 71 struct bdev_aio_task { 72 struct iocb iocb; 73 uint64_t len; 74 struct bdev_aio_io_channel *ch; 75 }; 76 77 struct file_disk { 78 struct bdev_aio_task *reset_task; 79 struct spdk_poller *reset_retry_timer; 80 struct spdk_bdev disk; 81 char *filename; 82 int fd; 83 TAILQ_ENTRY(file_disk) link; 84 bool block_size_override; 85 }; 86 87 /* For user space reaping of completions */ 88 struct spdk_aio_ring { 89 uint32_t id; 90 uint32_t size; 91 uint32_t head; 92 uint32_t tail; 93 94 uint32_t version; 95 uint32_t compat_features; 96 uint32_t incompat_features; 97 uint32_t header_length; 98 }; 99 100 #define SPDK_AIO_RING_VERSION 0xa10a10a1 101 102 static int bdev_aio_initialize(void); 103 static void bdev_aio_fini(void); 104 static void aio_free_disk(struct file_disk *fdisk); 105 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 106 107 #define SPDK_AIO_QUEUE_DEPTH 128 108 #define MAX_EVENTS_PER_POLL 32 109 110 static int 111 bdev_aio_get_ctx_size(void) 112 { 113 return sizeof(struct bdev_aio_task); 114 } 115 116 static struct spdk_bdev_module aio_if = { 117 .name = "aio", 118 .module_init = bdev_aio_initialize, 119 .module_fini = bdev_aio_fini, 120 .get_ctx_size = bdev_aio_get_ctx_size, 121 }; 122 123 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 124 125 static int 126 bdev_aio_open(struct file_disk *disk) 127 { 128 int fd; 129 130 fd = open(disk->filename, O_RDWR | O_DIRECT); 131 if (fd < 0) { 132 /* Try without O_DIRECT for non-disk files */ 133 fd = open(disk->filename, O_RDWR); 134 if (fd < 0) { 135 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 136 disk->filename, errno, spdk_strerror(errno)); 137 disk->fd = -1; 138 return -1; 139 } 140 } 141 142 disk->fd = fd; 143 144 return 0; 145 } 146 147 static int 148 bdev_aio_close(struct file_disk *disk) 149 { 150 int rc; 151 152 if (disk->fd == -1) { 153 return 0; 154 } 155 156 rc = close(disk->fd); 157 if (rc < 0) { 158 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 159 disk->fd, errno, spdk_strerror(errno)); 160 return -1; 161 } 162 163 disk->fd = -1; 164 165 return 0; 166 } 167 168 static int64_t 169 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 170 struct bdev_aio_task *aio_task, 171 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 172 { 173 struct iocb *iocb = &aio_task->iocb; 174 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 175 int rc; 176 177 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 178 if (aio_ch->group_ch->efd >= 0) { 179 io_set_eventfd(iocb, aio_ch->group_ch->efd); 180 } 181 iocb->data = aio_task; 182 aio_task->len = nbytes; 183 aio_task->ch = aio_ch; 184 185 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 186 iovcnt, nbytes, offset); 187 188 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 189 if (rc < 0) { 190 if (rc == -EAGAIN) { 191 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 192 } else { 193 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 194 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 195 } 196 return -1; 197 } 198 aio_ch->io_inflight++; 199 return nbytes; 200 } 201 202 static int64_t 203 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 204 struct bdev_aio_task *aio_task, 205 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 206 { 207 struct iocb *iocb = &aio_task->iocb; 208 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 209 int rc; 210 211 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 212 if (aio_ch->group_ch->efd >= 0) { 213 io_set_eventfd(iocb, aio_ch->group_ch->efd); 214 } 215 iocb->data = aio_task; 216 aio_task->len = len; 217 aio_task->ch = aio_ch; 218 219 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 220 iovcnt, len, offset); 221 222 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 223 if (rc < 0) { 224 if (rc == -EAGAIN) { 225 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 226 } else { 227 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc); 228 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 229 } 230 return -1; 231 } 232 aio_ch->io_inflight++; 233 return len; 234 } 235 236 static void 237 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 238 { 239 int rc = fsync(fdisk->fd); 240 241 if (rc == 0) { 242 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 243 } else { 244 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno); 245 } 246 } 247 248 static void 249 bdev_aio_destruct_cb(void *io_device) 250 { 251 struct file_disk *fdisk = io_device; 252 int rc = 0; 253 254 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 255 rc = bdev_aio_close(fdisk); 256 if (rc < 0) { 257 SPDK_ERRLOG("bdev_aio_close() failed\n"); 258 } 259 260 aio_free_disk(fdisk); 261 } 262 263 static int 264 bdev_aio_destruct(void *ctx) 265 { 266 struct file_disk *fdisk = ctx; 267 268 spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb); 269 270 return 0; 271 } 272 273 static int 274 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 275 { 276 uint32_t head, tail, count; 277 struct spdk_aio_ring *ring; 278 struct timespec timeout; 279 struct io_event *kevents; 280 281 ring = (struct spdk_aio_ring *)io_ctx; 282 283 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 284 timeout.tv_sec = 0; 285 timeout.tv_nsec = 0; 286 287 return io_getevents(io_ctx, 0, max, uevents, &timeout); 288 } 289 290 /* Read the current state out of the ring */ 291 head = ring->head; 292 tail = ring->tail; 293 294 /* This memory barrier is required to prevent the loads above 295 * from being re-ordered with stores to the events array 296 * potentially occurring on other threads. */ 297 spdk_smp_rmb(); 298 299 /* Calculate how many items are in the circular ring */ 300 count = tail - head; 301 if (tail < head) { 302 count += ring->size; 303 } 304 305 /* Reduce the count to the limit provided by the user */ 306 count = spdk_min(max, count); 307 308 /* Grab the memory location of the event array */ 309 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 310 311 /* Copy the events out of the ring. */ 312 if ((head + count) <= ring->size) { 313 /* Only one copy is required */ 314 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 315 } else { 316 uint32_t first_part = ring->size - head; 317 /* Two copies are required */ 318 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 319 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 320 } 321 322 /* Update the head pointer. On x86, stores will not be reordered with older loads, 323 * so the copies out of the event array will always be complete prior to this 324 * update becoming visible. On other architectures this is not guaranteed, so 325 * add a barrier. */ 326 #if defined(__i386__) || defined(__x86_64__) 327 spdk_compiler_barrier(); 328 #else 329 spdk_smp_mb(); 330 #endif 331 ring->head = (head + count) % ring->size; 332 333 return count; 334 } 335 336 static int 337 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 338 { 339 int nr, i = 0; 340 struct bdev_aio_task *aio_task; 341 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 342 uint64_t io_result; 343 344 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 345 346 if (nr < 0) { 347 return 0; 348 } 349 350 #define MAX_AIO_ERRNO 256 351 for (i = 0; i < nr; i++) { 352 aio_task = events[i].data; 353 aio_task->ch->io_inflight--; 354 io_result = events[i].res; 355 if (io_result == aio_task->len) { 356 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS); 357 } else if (io_result < MAX_AIO_ERRNO) { 358 /* Linux AIO will return its errno to io_event.res */ 359 int aio_errno = io_result; 360 361 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -aio_errno); 362 } else { 363 SPDK_ERRLOG("failed to complete aio: requested len is %lu, but completed len is %lu.\n", 364 aio_task->len, io_result); 365 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 366 } 367 } 368 369 return nr; 370 } 371 372 static int 373 bdev_aio_group_poll(void *arg) 374 { 375 struct bdev_aio_group_channel *group_ch = arg; 376 struct bdev_aio_io_channel *io_ch; 377 int nr = 0; 378 379 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 380 nr += bdev_aio_io_channel_poll(io_ch); 381 } 382 383 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 384 } 385 386 static int 387 bdev_aio_group_interrupt(void *arg) 388 { 389 struct bdev_aio_group_channel *group_ch = arg; 390 int rc; 391 uint64_t num_events; 392 393 assert(group_ch->efd >= 0); 394 395 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 396 * io_getevent should be called again to ensure all completed IO are processed. 397 */ 398 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 399 if (rc < 0) { 400 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 401 return -errno; 402 } 403 404 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 405 num_events -= SPDK_AIO_QUEUE_DEPTH; 406 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 407 if (rc < 0) { 408 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 409 } 410 } 411 412 return bdev_aio_group_poll(group_ch); 413 } 414 415 static void 416 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 417 { 418 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 419 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 420 421 if (aio_ch->io_inflight) { 422 spdk_for_each_channel_continue(i, -1); 423 return; 424 } 425 426 spdk_for_each_channel_continue(i, 0); 427 } 428 429 static int bdev_aio_reset_retry_timer(void *arg); 430 431 static void 432 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 433 { 434 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 435 436 if (status == -1) { 437 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 438 return; 439 } 440 441 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 442 } 443 444 static int 445 bdev_aio_reset_retry_timer(void *arg) 446 { 447 struct file_disk *fdisk = arg; 448 449 if (fdisk->reset_retry_timer) { 450 spdk_poller_unregister(&fdisk->reset_retry_timer); 451 } 452 453 spdk_for_each_channel(fdisk, 454 _bdev_aio_get_io_inflight, 455 fdisk, 456 _bdev_aio_get_io_inflight_done); 457 458 return SPDK_POLLER_BUSY; 459 } 460 461 static void 462 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 463 { 464 fdisk->reset_task = aio_task; 465 466 bdev_aio_reset_retry_timer(fdisk); 467 } 468 469 static void 470 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 471 bool success) 472 { 473 if (!success) { 474 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 475 return; 476 } 477 478 switch (bdev_io->type) { 479 case SPDK_BDEV_IO_TYPE_READ: 480 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 481 ch, 482 (struct bdev_aio_task *)bdev_io->driver_ctx, 483 bdev_io->u.bdev.iovs, 484 bdev_io->u.bdev.iovcnt, 485 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 486 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 487 break; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 490 ch, 491 (struct bdev_aio_task *)bdev_io->driver_ctx, 492 bdev_io->u.bdev.iovs, 493 bdev_io->u.bdev.iovcnt, 494 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 495 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 496 break; 497 default: 498 SPDK_ERRLOG("Wrong io type\n"); 499 break; 500 } 501 } 502 503 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 504 { 505 switch (bdev_io->type) { 506 /* Read and write operations must be performed on buffers aligned to 507 * bdev->required_alignment. If user specified unaligned buffers, 508 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 509 case SPDK_BDEV_IO_TYPE_READ: 510 case SPDK_BDEV_IO_TYPE_WRITE: 511 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 512 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 513 return 0; 514 case SPDK_BDEV_IO_TYPE_FLUSH: 515 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 516 (struct bdev_aio_task *)bdev_io->driver_ctx); 517 return 0; 518 519 case SPDK_BDEV_IO_TYPE_RESET: 520 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 521 (struct bdev_aio_task *)bdev_io->driver_ctx); 522 return 0; 523 default: 524 return -1; 525 } 526 } 527 528 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 529 { 530 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 531 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 532 } 533 } 534 535 static bool 536 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 537 { 538 switch (io_type) { 539 case SPDK_BDEV_IO_TYPE_READ: 540 case SPDK_BDEV_IO_TYPE_WRITE: 541 case SPDK_BDEV_IO_TYPE_FLUSH: 542 case SPDK_BDEV_IO_TYPE_RESET: 543 return true; 544 545 default: 546 return false; 547 } 548 } 549 550 static int 551 bdev_aio_create_cb(void *io_device, void *ctx_buf) 552 { 553 struct bdev_aio_io_channel *ch = ctx_buf; 554 555 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 556 SPDK_ERRLOG("async I/O context setup failure\n"); 557 return -1; 558 } 559 560 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 561 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 562 563 return 0; 564 } 565 566 static void 567 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 568 { 569 struct bdev_aio_io_channel *ch = ctx_buf; 570 571 io_destroy(ch->io_ctx); 572 573 assert(ch->group_ch); 574 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 575 576 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 577 } 578 579 static struct spdk_io_channel * 580 bdev_aio_get_io_channel(void *ctx) 581 { 582 struct file_disk *fdisk = ctx; 583 584 return spdk_get_io_channel(fdisk); 585 } 586 587 588 static int 589 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 590 { 591 struct file_disk *fdisk = ctx; 592 593 spdk_json_write_named_object_begin(w, "aio"); 594 595 spdk_json_write_named_string(w, "filename", fdisk->filename); 596 597 spdk_json_write_object_end(w); 598 599 return 0; 600 } 601 602 static void 603 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 604 { 605 struct file_disk *fdisk = bdev->ctxt; 606 607 spdk_json_write_object_begin(w); 608 609 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 610 611 spdk_json_write_named_object_begin(w, "params"); 612 spdk_json_write_named_string(w, "name", bdev->name); 613 if (fdisk->block_size_override) { 614 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 615 } 616 spdk_json_write_named_string(w, "filename", fdisk->filename); 617 spdk_json_write_object_end(w); 618 619 spdk_json_write_object_end(w); 620 } 621 622 static const struct spdk_bdev_fn_table aio_fn_table = { 623 .destruct = bdev_aio_destruct, 624 .submit_request = bdev_aio_submit_request, 625 .io_type_supported = bdev_aio_io_type_supported, 626 .get_io_channel = bdev_aio_get_io_channel, 627 .dump_info_json = bdev_aio_dump_info_json, 628 .write_config_json = bdev_aio_write_json_config, 629 }; 630 631 static void aio_free_disk(struct file_disk *fdisk) 632 { 633 if (fdisk == NULL) { 634 return; 635 } 636 free(fdisk->filename); 637 free(fdisk->disk.name); 638 free(fdisk); 639 } 640 641 static int 642 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 643 { 644 int efd; 645 646 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 647 if (efd < 0) { 648 return -1; 649 } 650 651 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 652 if (ch->intr == NULL) { 653 close(efd); 654 return -1; 655 } 656 ch->efd = efd; 657 658 return 0; 659 } 660 661 static void 662 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 663 { 664 spdk_interrupt_unregister(&ch->intr); 665 close(ch->efd); 666 ch->efd = -1; 667 } 668 669 static void 670 bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode) 671 { 672 return; 673 } 674 675 static int 676 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 677 { 678 struct bdev_aio_group_channel *ch = ctx_buf; 679 int rc; 680 681 TAILQ_INIT(&ch->io_ch_head); 682 /* Initialize ch->efd to be invalid and unused. */ 683 ch->efd = -1; 684 if (spdk_interrupt_mode_is_enabled()) { 685 rc = bdev_aio_register_interrupt(ch); 686 if (rc < 0) { 687 SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n"); 688 return rc; 689 } 690 } 691 692 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 693 spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL); 694 695 return 0; 696 } 697 698 static void 699 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 700 { 701 struct bdev_aio_group_channel *ch = ctx_buf; 702 703 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 704 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 705 } 706 707 spdk_poller_unregister(&ch->poller); 708 if (spdk_interrupt_mode_is_enabled()) { 709 bdev_aio_unregister_interrupt(ch); 710 } 711 } 712 713 int 714 create_aio_bdev(const char *name, const char *filename, uint32_t block_size) 715 { 716 struct file_disk *fdisk; 717 uint32_t detected_block_size; 718 uint64_t disk_size; 719 int rc; 720 721 fdisk = calloc(1, sizeof(*fdisk)); 722 if (!fdisk) { 723 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 724 return -ENOMEM; 725 } 726 727 fdisk->filename = strdup(filename); 728 if (!fdisk->filename) { 729 rc = -ENOMEM; 730 goto error_return; 731 } 732 733 if (bdev_aio_open(fdisk)) { 734 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 735 rc = -errno; 736 goto error_return; 737 } 738 739 disk_size = spdk_fd_get_size(fdisk->fd); 740 741 fdisk->disk.name = strdup(name); 742 if (!fdisk->disk.name) { 743 rc = -ENOMEM; 744 goto error_return; 745 } 746 fdisk->disk.product_name = "AIO disk"; 747 fdisk->disk.module = &aio_if; 748 749 fdisk->disk.write_cache = 1; 750 751 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 752 if (block_size == 0) { 753 /* User did not specify block size - use autodetected block size. */ 754 if (detected_block_size == 0) { 755 SPDK_ERRLOG("Block size could not be auto-detected\n"); 756 rc = -EINVAL; 757 goto error_return; 758 } 759 fdisk->block_size_override = false; 760 block_size = detected_block_size; 761 } else { 762 if (block_size < detected_block_size) { 763 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 764 "auto-detected block size %" PRIu32 "\n", 765 block_size, detected_block_size); 766 rc = -EINVAL; 767 goto error_return; 768 } else if (detected_block_size != 0 && block_size != detected_block_size) { 769 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 770 "auto-detected block size %" PRIu32 "\n", 771 block_size, detected_block_size); 772 } 773 fdisk->block_size_override = true; 774 } 775 776 if (block_size < 512) { 777 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 778 rc = -EINVAL; 779 goto error_return; 780 } 781 782 if (!spdk_u32_is_pow2(block_size)) { 783 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 784 rc = -EINVAL; 785 goto error_return; 786 } 787 788 fdisk->disk.blocklen = block_size; 789 if (fdisk->block_size_override && detected_block_size) { 790 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 791 } else { 792 fdisk->disk.required_alignment = spdk_u32log2(block_size); 793 } 794 795 if (disk_size % fdisk->disk.blocklen != 0) { 796 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 797 disk_size, fdisk->disk.blocklen); 798 rc = -EINVAL; 799 goto error_return; 800 } 801 802 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 803 fdisk->disk.ctxt = fdisk; 804 805 fdisk->disk.fn_table = &aio_fn_table; 806 807 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 808 sizeof(struct bdev_aio_io_channel), 809 fdisk->disk.name); 810 rc = spdk_bdev_register(&fdisk->disk); 811 if (rc) { 812 spdk_io_device_unregister(fdisk, NULL); 813 goto error_return; 814 } 815 816 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 817 return 0; 818 819 error_return: 820 bdev_aio_close(fdisk); 821 aio_free_disk(fdisk); 822 return rc; 823 } 824 825 struct delete_aio_bdev_ctx { 826 delete_aio_bdev_complete cb_fn; 827 void *cb_arg; 828 }; 829 830 static void 831 aio_bdev_unregister_cb(void *arg, int bdeverrno) 832 { 833 struct delete_aio_bdev_ctx *ctx = arg; 834 835 ctx->cb_fn(ctx->cb_arg, bdeverrno); 836 free(ctx); 837 } 838 839 void 840 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg) 841 { 842 struct delete_aio_bdev_ctx *ctx; 843 844 if (!bdev || bdev->module != &aio_if) { 845 cb_fn(cb_arg, -ENODEV); 846 return; 847 } 848 849 ctx = calloc(1, sizeof(*ctx)); 850 if (ctx == NULL) { 851 cb_fn(cb_arg, -ENOMEM); 852 return; 853 } 854 855 ctx->cb_fn = cb_fn; 856 ctx->cb_arg = cb_arg; 857 spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx); 858 } 859 860 static int 861 bdev_aio_initialize(void) 862 { 863 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 864 sizeof(struct bdev_aio_group_channel), "aio_module"); 865 866 return 0; 867 } 868 869 static void 870 bdev_aio_fini(void) 871 { 872 spdk_io_device_unregister(&aio_if, NULL); 873 } 874 875 SPDK_LOG_REGISTER_COMPONENT(aio) 876