1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_aio.h" 35 36 #include "spdk/stdinc.h" 37 38 #include "spdk/barrier.h" 39 #include "spdk/bdev.h" 40 #include "spdk/bdev_module.h" 41 #include "spdk/env.h" 42 #include "spdk/fd.h" 43 #include "spdk/likely.h" 44 #include "spdk/thread.h" 45 #include "spdk/json.h" 46 #include "spdk/util.h" 47 #include "spdk/string.h" 48 49 #include "spdk/log.h" 50 51 #include <sys/eventfd.h> 52 #include <libaio.h> 53 54 struct bdev_aio_io_channel { 55 uint64_t io_inflight; 56 io_context_t io_ctx; 57 struct bdev_aio_group_channel *group_ch; 58 TAILQ_ENTRY(bdev_aio_io_channel) link; 59 }; 60 61 struct bdev_aio_group_channel { 62 int efd; 63 struct spdk_interrupt *intr; 64 struct spdk_poller *poller; 65 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 66 }; 67 68 struct bdev_aio_task { 69 struct iocb iocb; 70 uint64_t len; 71 struct bdev_aio_io_channel *ch; 72 TAILQ_ENTRY(bdev_aio_task) link; 73 }; 74 75 struct file_disk { 76 struct bdev_aio_task *reset_task; 77 struct spdk_poller *reset_retry_timer; 78 struct spdk_bdev disk; 79 char *filename; 80 int fd; 81 TAILQ_ENTRY(file_disk) link; 82 bool block_size_override; 83 }; 84 85 /* For user space reaping of completions */ 86 struct spdk_aio_ring { 87 uint32_t id; 88 uint32_t size; 89 uint32_t head; 90 uint32_t tail; 91 92 uint32_t version; 93 uint32_t compat_features; 94 uint32_t incompat_features; 95 uint32_t header_length; 96 }; 97 98 #define SPDK_AIO_RING_VERSION 0xa10a10a1 99 100 static int bdev_aio_initialize(void); 101 static void bdev_aio_fini(void); 102 static void aio_free_disk(struct file_disk *fdisk); 103 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head); 104 105 #define SPDK_AIO_QUEUE_DEPTH 128 106 #define MAX_EVENTS_PER_POLL 32 107 108 static int 109 bdev_aio_get_ctx_size(void) 110 { 111 return sizeof(struct bdev_aio_task); 112 } 113 114 static struct spdk_bdev_module aio_if = { 115 .name = "aio", 116 .module_init = bdev_aio_initialize, 117 .module_fini = bdev_aio_fini, 118 .get_ctx_size = bdev_aio_get_ctx_size, 119 }; 120 121 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 122 123 static int 124 bdev_aio_open(struct file_disk *disk) 125 { 126 int fd; 127 128 fd = open(disk->filename, O_RDWR | O_DIRECT); 129 if (fd < 0) { 130 /* Try without O_DIRECT for non-disk files */ 131 fd = open(disk->filename, O_RDWR); 132 if (fd < 0) { 133 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 134 disk->filename, errno, spdk_strerror(errno)); 135 disk->fd = -1; 136 return -1; 137 } 138 } 139 140 disk->fd = fd; 141 142 return 0; 143 } 144 145 static int 146 bdev_aio_close(struct file_disk *disk) 147 { 148 int rc; 149 150 if (disk->fd == -1) { 151 return 0; 152 } 153 154 rc = close(disk->fd); 155 if (rc < 0) { 156 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 157 disk->fd, errno, spdk_strerror(errno)); 158 return -1; 159 } 160 161 disk->fd = -1; 162 163 return 0; 164 } 165 166 static int64_t 167 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 168 struct bdev_aio_task *aio_task, 169 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 170 { 171 struct iocb *iocb = &aio_task->iocb; 172 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 173 int rc; 174 175 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 176 if (aio_ch->group_ch->efd) { 177 io_set_eventfd(iocb, aio_ch->group_ch->efd); 178 } 179 iocb->data = aio_task; 180 aio_task->len = nbytes; 181 aio_task->ch = aio_ch; 182 183 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 184 iovcnt, nbytes, offset); 185 186 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 187 if (rc < 0) { 188 if (rc == -EAGAIN) { 189 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 190 } else { 191 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 192 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 193 } 194 return -1; 195 } 196 aio_ch->io_inflight++; 197 return nbytes; 198 } 199 200 static int64_t 201 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 202 struct bdev_aio_task *aio_task, 203 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 204 { 205 struct iocb *iocb = &aio_task->iocb; 206 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 207 int rc; 208 209 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 210 if (aio_ch->group_ch->efd) { 211 io_set_eventfd(iocb, aio_ch->group_ch->efd); 212 } 213 iocb->data = aio_task; 214 aio_task->len = len; 215 aio_task->ch = aio_ch; 216 217 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 218 iovcnt, len, offset); 219 220 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 221 if (rc < 0) { 222 if (rc == -EAGAIN) { 223 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 224 } else { 225 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 226 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 227 } 228 return -1; 229 } 230 aio_ch->io_inflight++; 231 return len; 232 } 233 234 static void 235 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 236 { 237 int rc = fsync(fdisk->fd); 238 239 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), 240 rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED); 241 } 242 243 static int 244 bdev_aio_destruct(void *ctx) 245 { 246 struct file_disk *fdisk = ctx; 247 int rc = 0; 248 249 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 250 rc = bdev_aio_close(fdisk); 251 if (rc < 0) { 252 SPDK_ERRLOG("bdev_aio_close() failed\n"); 253 } 254 spdk_io_device_unregister(fdisk, NULL); 255 aio_free_disk(fdisk); 256 return rc; 257 } 258 259 static int 260 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 261 { 262 uint32_t head, tail, count; 263 struct spdk_aio_ring *ring; 264 struct timespec timeout; 265 struct io_event *kevents; 266 267 ring = (struct spdk_aio_ring *)io_ctx; 268 269 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 270 timeout.tv_sec = 0; 271 timeout.tv_nsec = 0; 272 273 return io_getevents(io_ctx, 0, max, uevents, &timeout); 274 } 275 276 /* Read the current state out of the ring */ 277 head = ring->head; 278 tail = ring->tail; 279 280 /* This memory barrier is required to prevent the loads above 281 * from being re-ordered with stores to the events array 282 * potentially occurring on other threads. */ 283 spdk_smp_rmb(); 284 285 /* Calculate how many items are in the circular ring */ 286 count = tail - head; 287 if (tail < head) { 288 count += ring->size; 289 } 290 291 /* Reduce the count to the limit provided by the user */ 292 count = spdk_min(max, count); 293 294 /* Grab the memory location of the event array */ 295 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 296 297 /* Copy the events out of the ring. */ 298 if ((head + count) <= ring->size) { 299 /* Only one copy is required */ 300 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 301 } else { 302 uint32_t first_part = ring->size - head; 303 /* Two copies are required */ 304 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 305 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 306 } 307 308 /* Update the head pointer. On x86, stores will not be reordered with older loads, 309 * so the copies out of the event array will always be complete prior to this 310 * update becoming visible. On other architectures this is not guaranteed, so 311 * add a barrier. */ 312 #if defined(__i386__) || defined(__x86_64__) 313 spdk_compiler_barrier(); 314 #else 315 spdk_smp_mb(); 316 #endif 317 ring->head = (head + count) % ring->size; 318 319 return count; 320 } 321 322 static int 323 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 324 { 325 int nr, i = 0; 326 enum spdk_bdev_io_status status; 327 struct bdev_aio_task *aio_task; 328 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 329 330 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 331 332 if (nr < 0) { 333 return 0; 334 } 335 336 for (i = 0; i < nr; i++) { 337 aio_task = events[i].data; 338 if (events[i].res != aio_task->len) { 339 status = SPDK_BDEV_IO_STATUS_FAILED; 340 } else { 341 status = SPDK_BDEV_IO_STATUS_SUCCESS; 342 } 343 344 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status); 345 aio_task->ch->io_inflight--; 346 } 347 348 return nr; 349 } 350 351 static int 352 bdev_aio_group_poll(void *arg) 353 { 354 struct bdev_aio_group_channel *group_ch = arg; 355 struct bdev_aio_io_channel *io_ch; 356 int nr = 0; 357 358 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 359 nr += bdev_aio_io_channel_poll(io_ch); 360 } 361 362 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 363 } 364 365 static int 366 bdev_aio_group_interrupt(void *arg) 367 { 368 struct bdev_aio_group_channel *group_ch = arg; 369 int rc; 370 uint64_t num_events; 371 372 assert(group_ch->efd); 373 374 /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH, 375 * io_getevent should be called again to ensure all completed IO are processed. 376 */ 377 rc = read(group_ch->efd, &num_events, sizeof(num_events)); 378 if (rc < 0) { 379 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno)); 380 return -errno; 381 } 382 383 if (num_events > SPDK_AIO_QUEUE_DEPTH) { 384 num_events -= SPDK_AIO_QUEUE_DEPTH; 385 rc = write(group_ch->efd, &num_events, sizeof(num_events)); 386 if (rc < 0) { 387 SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno)); 388 } 389 } 390 391 return bdev_aio_group_poll(group_ch); 392 } 393 394 static void 395 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 396 { 397 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 398 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 399 400 if (aio_ch->io_inflight) { 401 spdk_for_each_channel_continue(i, -1); 402 return; 403 } 404 405 spdk_for_each_channel_continue(i, 0); 406 } 407 408 static int bdev_aio_reset_retry_timer(void *arg); 409 410 static void 411 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 412 { 413 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 414 415 if (status == -1) { 416 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 417 return; 418 } 419 420 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 421 } 422 423 static int 424 bdev_aio_reset_retry_timer(void *arg) 425 { 426 struct file_disk *fdisk = arg; 427 428 if (fdisk->reset_retry_timer) { 429 spdk_poller_unregister(&fdisk->reset_retry_timer); 430 } 431 432 spdk_for_each_channel(fdisk, 433 _bdev_aio_get_io_inflight, 434 fdisk, 435 _bdev_aio_get_io_inflight_done); 436 437 return SPDK_POLLER_BUSY; 438 } 439 440 static void 441 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 442 { 443 fdisk->reset_task = aio_task; 444 445 bdev_aio_reset_retry_timer(fdisk); 446 } 447 448 static void 449 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 450 bool success) 451 { 452 if (!success) { 453 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 454 return; 455 } 456 457 switch (bdev_io->type) { 458 case SPDK_BDEV_IO_TYPE_READ: 459 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 460 ch, 461 (struct bdev_aio_task *)bdev_io->driver_ctx, 462 bdev_io->u.bdev.iovs, 463 bdev_io->u.bdev.iovcnt, 464 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 465 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 466 break; 467 case SPDK_BDEV_IO_TYPE_WRITE: 468 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 469 ch, 470 (struct bdev_aio_task *)bdev_io->driver_ctx, 471 bdev_io->u.bdev.iovs, 472 bdev_io->u.bdev.iovcnt, 473 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 474 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 475 break; 476 default: 477 SPDK_ERRLOG("Wrong io type\n"); 478 break; 479 } 480 } 481 482 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 483 { 484 switch (bdev_io->type) { 485 /* Read and write operations must be performed on buffers aligned to 486 * bdev->required_alignment. If user specified unaligned buffers, 487 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 488 case SPDK_BDEV_IO_TYPE_READ: 489 case SPDK_BDEV_IO_TYPE_WRITE: 490 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 491 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 492 return 0; 493 case SPDK_BDEV_IO_TYPE_FLUSH: 494 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 495 (struct bdev_aio_task *)bdev_io->driver_ctx); 496 return 0; 497 498 case SPDK_BDEV_IO_TYPE_RESET: 499 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 500 (struct bdev_aio_task *)bdev_io->driver_ctx); 501 return 0; 502 default: 503 return -1; 504 } 505 } 506 507 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 508 { 509 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 510 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 511 } 512 } 513 514 static bool 515 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 516 { 517 switch (io_type) { 518 case SPDK_BDEV_IO_TYPE_READ: 519 case SPDK_BDEV_IO_TYPE_WRITE: 520 case SPDK_BDEV_IO_TYPE_FLUSH: 521 case SPDK_BDEV_IO_TYPE_RESET: 522 return true; 523 524 default: 525 return false; 526 } 527 } 528 529 static int 530 bdev_aio_create_cb(void *io_device, void *ctx_buf) 531 { 532 struct bdev_aio_io_channel *ch = ctx_buf; 533 534 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 535 SPDK_ERRLOG("async I/O context setup failure\n"); 536 return -1; 537 } 538 539 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 540 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 541 542 return 0; 543 } 544 545 static void 546 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 547 { 548 struct bdev_aio_io_channel *ch = ctx_buf; 549 550 io_destroy(ch->io_ctx); 551 552 assert(ch->group_ch); 553 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 554 555 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 556 } 557 558 static struct spdk_io_channel * 559 bdev_aio_get_io_channel(void *ctx) 560 { 561 struct file_disk *fdisk = ctx; 562 563 return spdk_get_io_channel(fdisk); 564 } 565 566 567 static int 568 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 569 { 570 struct file_disk *fdisk = ctx; 571 572 spdk_json_write_named_object_begin(w, "aio"); 573 574 spdk_json_write_named_string(w, "filename", fdisk->filename); 575 576 spdk_json_write_object_end(w); 577 578 return 0; 579 } 580 581 static void 582 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 583 { 584 struct file_disk *fdisk = bdev->ctxt; 585 586 spdk_json_write_object_begin(w); 587 588 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 589 590 spdk_json_write_named_object_begin(w, "params"); 591 spdk_json_write_named_string(w, "name", bdev->name); 592 if (fdisk->block_size_override) { 593 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 594 } 595 spdk_json_write_named_string(w, "filename", fdisk->filename); 596 spdk_json_write_object_end(w); 597 598 spdk_json_write_object_end(w); 599 } 600 601 static const struct spdk_bdev_fn_table aio_fn_table = { 602 .destruct = bdev_aio_destruct, 603 .submit_request = bdev_aio_submit_request, 604 .io_type_supported = bdev_aio_io_type_supported, 605 .get_io_channel = bdev_aio_get_io_channel, 606 .dump_info_json = bdev_aio_dump_info_json, 607 .write_config_json = bdev_aio_write_json_config, 608 }; 609 610 static void aio_free_disk(struct file_disk *fdisk) 611 { 612 if (fdisk == NULL) { 613 return; 614 } 615 free(fdisk->filename); 616 free(fdisk->disk.name); 617 free(fdisk); 618 } 619 620 static int 621 bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch) 622 { 623 int efd; 624 625 efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 626 if (efd < 0) { 627 return -1; 628 } 629 630 ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch); 631 if (ch->intr == NULL) { 632 close(efd); 633 return -1; 634 } 635 ch->efd = efd; 636 637 return 0; 638 } 639 640 static void 641 bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch) 642 { 643 spdk_interrupt_unregister(&ch->intr); 644 close(ch->efd); 645 ch->efd = 0; 646 } 647 648 static int 649 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 650 { 651 struct bdev_aio_group_channel *ch = ctx_buf; 652 653 TAILQ_INIT(&ch->io_ch_head); 654 655 if (spdk_interrupt_mode_is_enabled()) { 656 return bdev_aio_register_interrupt(ch); 657 } 658 659 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 660 661 return 0; 662 } 663 664 static void 665 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 666 { 667 struct bdev_aio_group_channel *ch = ctx_buf; 668 669 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 670 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 671 } 672 673 if (ch->intr) { 674 bdev_aio_unregister_interrupt(ch); 675 return; 676 } 677 678 spdk_poller_unregister(&ch->poller); 679 } 680 681 int 682 create_aio_bdev(const char *name, const char *filename, uint32_t block_size) 683 { 684 struct file_disk *fdisk; 685 uint32_t detected_block_size; 686 uint64_t disk_size; 687 int rc; 688 689 fdisk = calloc(1, sizeof(*fdisk)); 690 if (!fdisk) { 691 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 692 return -ENOMEM; 693 } 694 695 fdisk->filename = strdup(filename); 696 if (!fdisk->filename) { 697 rc = -ENOMEM; 698 goto error_return; 699 } 700 701 if (bdev_aio_open(fdisk)) { 702 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 703 rc = -errno; 704 goto error_return; 705 } 706 707 disk_size = spdk_fd_get_size(fdisk->fd); 708 709 fdisk->disk.name = strdup(name); 710 if (!fdisk->disk.name) { 711 rc = -ENOMEM; 712 goto error_return; 713 } 714 fdisk->disk.product_name = "AIO disk"; 715 fdisk->disk.module = &aio_if; 716 717 fdisk->disk.write_cache = 1; 718 719 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 720 if (block_size == 0) { 721 /* User did not specify block size - use autodetected block size. */ 722 if (detected_block_size == 0) { 723 SPDK_ERRLOG("Block size could not be auto-detected\n"); 724 rc = -EINVAL; 725 goto error_return; 726 } 727 fdisk->block_size_override = false; 728 block_size = detected_block_size; 729 } else { 730 if (block_size < detected_block_size) { 731 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 732 "auto-detected block size %" PRIu32 "\n", 733 block_size, detected_block_size); 734 rc = -EINVAL; 735 goto error_return; 736 } else if (detected_block_size != 0 && block_size != detected_block_size) { 737 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 738 "auto-detected block size %" PRIu32 "\n", 739 block_size, detected_block_size); 740 } 741 fdisk->block_size_override = true; 742 } 743 744 if (block_size < 512) { 745 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 746 rc = -EINVAL; 747 goto error_return; 748 } 749 750 if (!spdk_u32_is_pow2(block_size)) { 751 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 752 rc = -EINVAL; 753 goto error_return; 754 } 755 756 fdisk->disk.blocklen = block_size; 757 if (fdisk->block_size_override && detected_block_size) { 758 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 759 } else { 760 fdisk->disk.required_alignment = spdk_u32log2(block_size); 761 } 762 763 if (disk_size % fdisk->disk.blocklen != 0) { 764 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 765 disk_size, fdisk->disk.blocklen); 766 rc = -EINVAL; 767 goto error_return; 768 } 769 770 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 771 fdisk->disk.ctxt = fdisk; 772 773 fdisk->disk.fn_table = &aio_fn_table; 774 775 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 776 sizeof(struct bdev_aio_io_channel), 777 fdisk->disk.name); 778 rc = spdk_bdev_register(&fdisk->disk); 779 if (rc) { 780 spdk_io_device_unregister(fdisk, NULL); 781 goto error_return; 782 } 783 784 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 785 return 0; 786 787 error_return: 788 bdev_aio_close(fdisk); 789 aio_free_disk(fdisk); 790 return rc; 791 } 792 793 struct delete_aio_bdev_ctx { 794 delete_aio_bdev_complete cb_fn; 795 void *cb_arg; 796 }; 797 798 static void 799 aio_bdev_unregister_cb(void *arg, int bdeverrno) 800 { 801 struct delete_aio_bdev_ctx *ctx = arg; 802 803 ctx->cb_fn(ctx->cb_arg, bdeverrno); 804 free(ctx); 805 } 806 807 void 808 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg) 809 { 810 struct delete_aio_bdev_ctx *ctx; 811 812 if (!bdev || bdev->module != &aio_if) { 813 cb_fn(cb_arg, -ENODEV); 814 return; 815 } 816 817 ctx = calloc(1, sizeof(*ctx)); 818 if (ctx == NULL) { 819 cb_fn(cb_arg, -ENOMEM); 820 return; 821 } 822 823 ctx->cb_fn = cb_fn; 824 ctx->cb_arg = cb_arg; 825 spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx); 826 } 827 828 static int 829 bdev_aio_initialize(void) 830 { 831 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 832 sizeof(struct bdev_aio_group_channel), "aio_module"); 833 834 return 0; 835 } 836 837 static void 838 bdev_aio_fini(void) 839 { 840 spdk_io_device_unregister(&aio_if, NULL); 841 } 842 843 SPDK_LOG_REGISTER_COMPONENT(aio) 844