1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_aio.h" 35 36 #include "spdk/stdinc.h" 37 38 #include "spdk/barrier.h" 39 #include "spdk/bdev.h" 40 #include "spdk/bdev_module.h" 41 #include "spdk/conf.h" 42 #include "spdk/env.h" 43 #include "spdk/fd.h" 44 #include "spdk/likely.h" 45 #include "spdk/thread.h" 46 #include "spdk/json.h" 47 #include "spdk/util.h" 48 #include "spdk/string.h" 49 50 #include "spdk_internal/log.h" 51 52 #include <libaio.h> 53 54 struct bdev_aio_io_channel { 55 uint64_t io_inflight; 56 io_context_t io_ctx; 57 struct bdev_aio_group_channel *group_ch; 58 TAILQ_ENTRY(bdev_aio_io_channel) link; 59 }; 60 61 struct bdev_aio_group_channel { 62 struct spdk_poller *poller; 63 TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; 64 }; 65 66 struct bdev_aio_task { 67 struct iocb iocb; 68 uint64_t len; 69 struct bdev_aio_io_channel *ch; 70 TAILQ_ENTRY(bdev_aio_task) link; 71 }; 72 73 struct file_disk { 74 struct bdev_aio_task *reset_task; 75 struct spdk_poller *reset_retry_timer; 76 struct spdk_bdev disk; 77 char *filename; 78 int fd; 79 TAILQ_ENTRY(file_disk) link; 80 bool block_size_override; 81 }; 82 83 /* For user space reaping of completions */ 84 struct spdk_aio_ring { 85 uint32_t id; 86 uint32_t size; 87 uint32_t head; 88 uint32_t tail; 89 90 uint32_t version; 91 uint32_t compat_features; 92 uint32_t incompat_features; 93 uint32_t header_length; 94 }; 95 96 #define SPDK_AIO_RING_VERSION 0xa10a10a1 97 98 static int bdev_aio_initialize(void); 99 static void bdev_aio_fini(void); 100 static void aio_free_disk(struct file_disk *fdisk); 101 static void bdev_aio_get_spdk_running_config(FILE *fp); 102 static TAILQ_HEAD(, file_disk) g_aio_disk_head; 103 104 #define SPDK_AIO_QUEUE_DEPTH 128 105 #define MAX_EVENTS_PER_POLL 32 106 107 static int 108 bdev_aio_get_ctx_size(void) 109 { 110 return sizeof(struct bdev_aio_task); 111 } 112 113 static struct spdk_bdev_module aio_if = { 114 .name = "aio", 115 .module_init = bdev_aio_initialize, 116 .module_fini = bdev_aio_fini, 117 .config_text = bdev_aio_get_spdk_running_config, 118 .get_ctx_size = bdev_aio_get_ctx_size, 119 }; 120 121 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if) 122 123 static int 124 bdev_aio_open(struct file_disk *disk) 125 { 126 int fd; 127 128 fd = open(disk->filename, O_RDWR | O_DIRECT); 129 if (fd < 0) { 130 /* Try without O_DIRECT for non-disk files */ 131 fd = open(disk->filename, O_RDWR); 132 if (fd < 0) { 133 SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n", 134 disk->filename, errno, spdk_strerror(errno)); 135 disk->fd = -1; 136 return -1; 137 } 138 } 139 140 disk->fd = fd; 141 142 return 0; 143 } 144 145 static int 146 bdev_aio_close(struct file_disk *disk) 147 { 148 int rc; 149 150 if (disk->fd == -1) { 151 return 0; 152 } 153 154 rc = close(disk->fd); 155 if (rc < 0) { 156 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n", 157 disk->fd, errno, spdk_strerror(errno)); 158 return -1; 159 } 160 161 disk->fd = -1; 162 163 return 0; 164 } 165 166 static int64_t 167 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, 168 struct bdev_aio_task *aio_task, 169 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 170 { 171 struct iocb *iocb = &aio_task->iocb; 172 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 173 int rc; 174 175 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset); 176 iocb->data = aio_task; 177 aio_task->len = nbytes; 178 aio_task->ch = aio_ch; 179 180 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n", 181 iovcnt, nbytes, offset); 182 183 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 184 if (rc < 0) { 185 if (rc == -EAGAIN) { 186 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 187 } else { 188 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 189 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 190 } 191 return -1; 192 } 193 aio_ch->io_inflight++; 194 return nbytes; 195 } 196 197 static int64_t 198 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, 199 struct bdev_aio_task *aio_task, 200 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 201 { 202 struct iocb *iocb = &aio_task->iocb; 203 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 204 int rc; 205 206 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset); 207 iocb->data = aio_task; 208 aio_task->len = len; 209 aio_task->ch = aio_ch; 210 211 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n", 212 iovcnt, len, offset); 213 214 rc = io_submit(aio_ch->io_ctx, 1, &iocb); 215 if (rc < 0) { 216 if (rc == -EAGAIN) { 217 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); 218 } else { 219 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED); 220 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc); 221 } 222 return -1; 223 } 224 aio_ch->io_inflight++; 225 return len; 226 } 227 228 static void 229 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 230 { 231 int rc = fsync(fdisk->fd); 232 233 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), 234 rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED); 235 } 236 237 static int 238 bdev_aio_destruct(void *ctx) 239 { 240 struct file_disk *fdisk = ctx; 241 int rc = 0; 242 243 TAILQ_REMOVE(&g_aio_disk_head, fdisk, link); 244 rc = bdev_aio_close(fdisk); 245 if (rc < 0) { 246 SPDK_ERRLOG("bdev_aio_close() failed\n"); 247 } 248 spdk_io_device_unregister(fdisk, NULL); 249 aio_free_disk(fdisk); 250 return rc; 251 } 252 253 static int 254 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents) 255 { 256 uint32_t head, tail, count; 257 struct spdk_aio_ring *ring; 258 struct timespec timeout; 259 struct io_event *kevents; 260 261 ring = (struct spdk_aio_ring *)io_ctx; 262 263 if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) { 264 timeout.tv_sec = 0; 265 timeout.tv_nsec = 0; 266 267 return io_getevents(io_ctx, 0, max, uevents, &timeout); 268 } 269 270 /* Read the current state out of the ring */ 271 head = ring->head; 272 tail = ring->tail; 273 274 /* This memory barrier is required to prevent the loads above 275 * from being re-ordered with stores to the events array 276 * potentially occurring on other threads. */ 277 spdk_smp_rmb(); 278 279 /* Calculate how many items are in the circular ring */ 280 count = tail - head; 281 if (tail < head) { 282 count += ring->size; 283 } 284 285 /* Reduce the count to the limit provided by the user */ 286 count = spdk_min(max, count); 287 288 /* Grab the memory location of the event array */ 289 kevents = (struct io_event *)((uintptr_t)ring + ring->header_length); 290 291 /* Copy the events out of the ring. */ 292 if ((head + count) <= ring->size) { 293 /* Only one copy is required */ 294 memcpy(uevents, &kevents[head], count * sizeof(struct io_event)); 295 } else { 296 uint32_t first_part = ring->size - head; 297 /* Two copies are required */ 298 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event)); 299 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event)); 300 } 301 302 /* Update the head pointer. On x86, stores will not be reordered with older loads, 303 * so the copies out of the event array will always be complete prior to this 304 * update becoming visible. On other architectures this is not guaranteed, so 305 * add a barrier. */ 306 #if defined(__i386__) || defined(__x86_64__) 307 spdk_compiler_barrier(); 308 #else 309 spdk_smp_mb(); 310 #endif 311 ring->head = (head + count) % ring->size; 312 313 return count; 314 } 315 316 static int 317 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) 318 { 319 int nr, i = 0; 320 enum spdk_bdev_io_status status; 321 struct bdev_aio_task *aio_task; 322 struct io_event events[SPDK_AIO_QUEUE_DEPTH]; 323 324 nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); 325 326 if (nr < 0) { 327 return 0; 328 } 329 330 for (i = 0; i < nr; i++) { 331 aio_task = events[i].data; 332 if (events[i].res != aio_task->len) { 333 status = SPDK_BDEV_IO_STATUS_FAILED; 334 } else { 335 status = SPDK_BDEV_IO_STATUS_SUCCESS; 336 } 337 338 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status); 339 aio_task->ch->io_inflight--; 340 } 341 342 return nr; 343 } 344 345 static int 346 bdev_aio_group_poll(void *arg) 347 { 348 struct bdev_aio_group_channel *group_ch = arg; 349 struct bdev_aio_io_channel *io_ch; 350 int nr = 0; 351 352 TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { 353 nr += bdev_aio_io_channel_poll(io_ch); 354 } 355 356 return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 357 } 358 359 static void 360 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i) 361 { 362 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 363 struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch); 364 365 if (aio_ch->io_inflight) { 366 spdk_for_each_channel_continue(i, -1); 367 return; 368 } 369 370 spdk_for_each_channel_continue(i, 0); 371 } 372 373 static int bdev_aio_reset_retry_timer(void *arg); 374 375 static void 376 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 377 { 378 struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i); 379 380 if (status == -1) { 381 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500); 382 return; 383 } 384 385 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 386 } 387 388 static int 389 bdev_aio_reset_retry_timer(void *arg) 390 { 391 struct file_disk *fdisk = arg; 392 393 if (fdisk->reset_retry_timer) { 394 spdk_poller_unregister(&fdisk->reset_retry_timer); 395 } 396 397 spdk_for_each_channel(fdisk, 398 _bdev_aio_get_io_inflight, 399 fdisk, 400 _bdev_aio_get_io_inflight_done); 401 402 return SPDK_POLLER_BUSY; 403 } 404 405 static void 406 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task) 407 { 408 fdisk->reset_task = aio_task; 409 410 bdev_aio_reset_retry_timer(fdisk); 411 } 412 413 static void 414 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 415 bool success) 416 { 417 if (!success) { 418 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 419 return; 420 } 421 422 switch (bdev_io->type) { 423 case SPDK_BDEV_IO_TYPE_READ: 424 bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt, 425 ch, 426 (struct bdev_aio_task *)bdev_io->driver_ctx, 427 bdev_io->u.bdev.iovs, 428 bdev_io->u.bdev.iovcnt, 429 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 430 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 431 break; 432 case SPDK_BDEV_IO_TYPE_WRITE: 433 bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt, 434 ch, 435 (struct bdev_aio_task *)bdev_io->driver_ctx, 436 bdev_io->u.bdev.iovs, 437 bdev_io->u.bdev.iovcnt, 438 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 439 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 440 break; 441 default: 442 SPDK_ERRLOG("Wrong io type\n"); 443 break; 444 } 445 } 446 447 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 448 { 449 switch (bdev_io->type) { 450 /* Read and write operations must be performed on buffers aligned to 451 * bdev->required_alignment. If user specified unaligned buffers, 452 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 453 case SPDK_BDEV_IO_TYPE_READ: 454 case SPDK_BDEV_IO_TYPE_WRITE: 455 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb, 456 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 457 return 0; 458 case SPDK_BDEV_IO_TYPE_FLUSH: 459 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt, 460 (struct bdev_aio_task *)bdev_io->driver_ctx); 461 return 0; 462 463 case SPDK_BDEV_IO_TYPE_RESET: 464 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt, 465 (struct bdev_aio_task *)bdev_io->driver_ctx); 466 return 0; 467 default: 468 return -1; 469 } 470 } 471 472 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 473 { 474 if (_bdev_aio_submit_request(ch, bdev_io) < 0) { 475 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 476 } 477 } 478 479 static bool 480 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 481 { 482 switch (io_type) { 483 case SPDK_BDEV_IO_TYPE_READ: 484 case SPDK_BDEV_IO_TYPE_WRITE: 485 case SPDK_BDEV_IO_TYPE_FLUSH: 486 case SPDK_BDEV_IO_TYPE_RESET: 487 return true; 488 489 default: 490 return false; 491 } 492 } 493 494 static int 495 bdev_aio_create_cb(void *io_device, void *ctx_buf) 496 { 497 struct bdev_aio_io_channel *ch = ctx_buf; 498 499 if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { 500 SPDK_ERRLOG("async I/O context setup failure\n"); 501 return -1; 502 } 503 504 ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); 505 TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); 506 507 return 0; 508 } 509 510 static void 511 bdev_aio_destroy_cb(void *io_device, void *ctx_buf) 512 { 513 struct bdev_aio_io_channel *ch = ctx_buf; 514 515 io_destroy(ch->io_ctx); 516 517 assert(ch->group_ch); 518 TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); 519 520 spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); 521 } 522 523 static struct spdk_io_channel * 524 bdev_aio_get_io_channel(void *ctx) 525 { 526 struct file_disk *fdisk = ctx; 527 528 return spdk_get_io_channel(fdisk); 529 } 530 531 532 static int 533 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 534 { 535 struct file_disk *fdisk = ctx; 536 537 spdk_json_write_named_object_begin(w, "aio"); 538 539 spdk_json_write_named_string(w, "filename", fdisk->filename); 540 541 spdk_json_write_object_end(w); 542 543 return 0; 544 } 545 546 static void 547 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 548 { 549 struct file_disk *fdisk = bdev->ctxt; 550 551 spdk_json_write_object_begin(w); 552 553 spdk_json_write_named_string(w, "method", "bdev_aio_create"); 554 555 spdk_json_write_named_object_begin(w, "params"); 556 spdk_json_write_named_string(w, "name", bdev->name); 557 if (fdisk->block_size_override) { 558 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 559 } 560 spdk_json_write_named_string(w, "filename", fdisk->filename); 561 spdk_json_write_object_end(w); 562 563 spdk_json_write_object_end(w); 564 } 565 566 static const struct spdk_bdev_fn_table aio_fn_table = { 567 .destruct = bdev_aio_destruct, 568 .submit_request = bdev_aio_submit_request, 569 .io_type_supported = bdev_aio_io_type_supported, 570 .get_io_channel = bdev_aio_get_io_channel, 571 .dump_info_json = bdev_aio_dump_info_json, 572 .write_config_json = bdev_aio_write_json_config, 573 }; 574 575 static void aio_free_disk(struct file_disk *fdisk) 576 { 577 if (fdisk == NULL) { 578 return; 579 } 580 free(fdisk->filename); 581 free(fdisk->disk.name); 582 free(fdisk); 583 } 584 585 static int 586 bdev_aio_group_create_cb(void *io_device, void *ctx_buf) 587 { 588 struct bdev_aio_group_channel *ch = ctx_buf; 589 590 TAILQ_INIT(&ch->io_ch_head); 591 592 ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); 593 return 0; 594 } 595 596 static void 597 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) 598 { 599 struct bdev_aio_group_channel *ch = ctx_buf; 600 601 if (!TAILQ_EMPTY(&ch->io_ch_head)) { 602 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); 603 } 604 605 spdk_poller_unregister(&ch->poller); 606 } 607 608 int 609 create_aio_bdev(const char *name, const char *filename, uint32_t block_size) 610 { 611 struct file_disk *fdisk; 612 uint32_t detected_block_size; 613 uint64_t disk_size; 614 int rc; 615 616 fdisk = calloc(1, sizeof(*fdisk)); 617 if (!fdisk) { 618 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n"); 619 return -ENOMEM; 620 } 621 622 fdisk->filename = strdup(filename); 623 if (!fdisk->filename) { 624 rc = -ENOMEM; 625 goto error_return; 626 } 627 628 if (bdev_aio_open(fdisk)) { 629 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno); 630 rc = -errno; 631 goto error_return; 632 } 633 634 disk_size = spdk_fd_get_size(fdisk->fd); 635 636 fdisk->disk.name = strdup(name); 637 if (!fdisk->disk.name) { 638 rc = -ENOMEM; 639 goto error_return; 640 } 641 fdisk->disk.product_name = "AIO disk"; 642 fdisk->disk.module = &aio_if; 643 644 fdisk->disk.write_cache = 1; 645 646 detected_block_size = spdk_fd_get_blocklen(fdisk->fd); 647 if (block_size == 0) { 648 /* User did not specify block size - use autodetected block size. */ 649 if (detected_block_size == 0) { 650 SPDK_ERRLOG("Block size could not be auto-detected\n"); 651 rc = -EINVAL; 652 goto error_return; 653 } 654 fdisk->block_size_override = false; 655 block_size = detected_block_size; 656 } else { 657 if (block_size < detected_block_size) { 658 SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than " 659 "auto-detected block size %" PRIu32 "\n", 660 block_size, detected_block_size); 661 rc = -EINVAL; 662 goto error_return; 663 } else if (detected_block_size != 0 && block_size != detected_block_size) { 664 SPDK_WARNLOG("Specified block size %" PRIu32 " does not match " 665 "auto-detected block size %" PRIu32 "\n", 666 block_size, detected_block_size); 667 } 668 fdisk->block_size_override = true; 669 } 670 671 if (block_size < 512) { 672 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 673 rc = -EINVAL; 674 goto error_return; 675 } 676 677 if (!spdk_u32_is_pow2(block_size)) { 678 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 679 rc = -EINVAL; 680 goto error_return; 681 } 682 683 fdisk->disk.blocklen = block_size; 684 if (fdisk->block_size_override && detected_block_size) { 685 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size); 686 } else { 687 fdisk->disk.required_alignment = spdk_u32log2(block_size); 688 } 689 690 if (disk_size % fdisk->disk.blocklen != 0) { 691 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 692 disk_size, fdisk->disk.blocklen); 693 rc = -EINVAL; 694 goto error_return; 695 } 696 697 fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen; 698 fdisk->disk.ctxt = fdisk; 699 700 fdisk->disk.fn_table = &aio_fn_table; 701 702 spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb, 703 sizeof(struct bdev_aio_io_channel), 704 fdisk->disk.name); 705 rc = spdk_bdev_register(&fdisk->disk); 706 if (rc) { 707 spdk_io_device_unregister(fdisk, NULL); 708 goto error_return; 709 } 710 711 TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link); 712 return 0; 713 714 error_return: 715 bdev_aio_close(fdisk); 716 aio_free_disk(fdisk); 717 return rc; 718 } 719 720 struct delete_aio_bdev_ctx { 721 delete_aio_bdev_complete cb_fn; 722 void *cb_arg; 723 }; 724 725 static void 726 aio_bdev_unregister_cb(void *arg, int bdeverrno) 727 { 728 struct delete_aio_bdev_ctx *ctx = arg; 729 730 ctx->cb_fn(ctx->cb_arg, bdeverrno); 731 free(ctx); 732 } 733 734 void 735 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg) 736 { 737 struct delete_aio_bdev_ctx *ctx; 738 739 if (!bdev || bdev->module != &aio_if) { 740 cb_fn(cb_arg, -ENODEV); 741 return; 742 } 743 744 ctx = calloc(1, sizeof(*ctx)); 745 if (ctx == NULL) { 746 cb_fn(cb_arg, -ENOMEM); 747 return; 748 } 749 750 ctx->cb_fn = cb_fn; 751 ctx->cb_arg = cb_arg; 752 spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx); 753 } 754 755 static int 756 bdev_aio_initialize(void) 757 { 758 size_t i; 759 struct spdk_conf_section *sp; 760 int rc = 0; 761 762 TAILQ_INIT(&g_aio_disk_head); 763 spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb, 764 sizeof(struct bdev_aio_group_channel), 765 "aio_module"); 766 767 sp = spdk_conf_find_section(NULL, "AIO"); 768 if (!sp) { 769 return 0; 770 } 771 772 i = 0; 773 while (true) { 774 const char *file; 775 const char *name; 776 const char *block_size_str; 777 uint32_t block_size = 0; 778 long int tmp; 779 780 file = spdk_conf_section_get_nmval(sp, "AIO", i, 0); 781 if (!file) { 782 break; 783 } 784 785 name = spdk_conf_section_get_nmval(sp, "AIO", i, 1); 786 if (!name) { 787 SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file); 788 i++; 789 continue; 790 } 791 792 block_size_str = spdk_conf_section_get_nmval(sp, "AIO", i, 2); 793 if (block_size_str) { 794 tmp = spdk_strtol(block_size_str, 10); 795 if (tmp < 0) { 796 SPDK_ERRLOG("Invalid block size for AIO disk with file %s\n", file); 797 i++; 798 continue; 799 } 800 block_size = (uint32_t)tmp; 801 } 802 803 rc = create_aio_bdev(name, file, block_size); 804 if (rc) { 805 SPDK_ERRLOG("Unable to create AIO bdev from file %s, err is %s\n", file, spdk_strerror(-rc)); 806 } 807 808 i++; 809 } 810 811 return 0; 812 } 813 814 static void 815 bdev_aio_fini(void) 816 { 817 spdk_io_device_unregister(&aio_if, NULL); 818 } 819 820 static void 821 bdev_aio_get_spdk_running_config(FILE *fp) 822 { 823 char *file; 824 char *name; 825 uint32_t block_size; 826 struct file_disk *fdisk; 827 828 fprintf(fp, 829 "\n" 830 "# Users must change this section to match the /dev/sdX devices to be\n" 831 "# exported as iSCSI LUNs. The devices are accessed using Linux AIO.\n" 832 "# The format is:\n" 833 "# AIO <file name> <bdev name> [<block size>]\n" 834 "# The file name is the backing device\n" 835 "# The bdev name can be referenced from elsewhere in the configuration file.\n" 836 "# Block size may be omitted to automatically detect the block size of a disk.\n" 837 "[AIO]\n"); 838 839 TAILQ_FOREACH(fdisk, &g_aio_disk_head, link) { 840 file = fdisk->filename; 841 name = fdisk->disk.name; 842 block_size = fdisk->disk.blocklen; 843 fprintf(fp, " AIO %s %s ", file, name); 844 if (fdisk->block_size_override) { 845 fprintf(fp, "%d", block_size); 846 } 847 fprintf(fp, "\n"); 848 } 849 fprintf(fp, "\n"); 850 } 851 852 SPDK_LOG_REGISTER_COMPONENT(aio) 853