xref: /spdk/module/bdev/aio/bdev_aio.c (revision 94a84ae98590bea46939eb1dcd7a9876bd393b54)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_aio.h"
35 
36 #include "spdk/stdinc.h"
37 
38 #include "spdk/barrier.h"
39 #include "spdk/bdev.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk/conf.h"
42 #include "spdk/env.h"
43 #include "spdk/fd.h"
44 #include "spdk/likely.h"
45 #include "spdk/thread.h"
46 #include "spdk/json.h"
47 #include "spdk/util.h"
48 #include "spdk/string.h"
49 
50 #include "spdk_internal/log.h"
51 
52 #include <libaio.h>
53 
54 struct bdev_aio_io_channel {
55 	uint64_t				io_inflight;
56 	struct bdev_aio_group_channel		*group_ch;
57 };
58 
59 struct bdev_aio_group_channel {
60 	struct spdk_poller			*poller;
61 	io_context_t				io_ctx;
62 };
63 
64 struct bdev_aio_task {
65 	struct iocb			iocb;
66 	uint64_t			len;
67 	struct bdev_aio_io_channel	*ch;
68 	TAILQ_ENTRY(bdev_aio_task)	link;
69 };
70 
71 struct file_disk {
72 	struct bdev_aio_task	*reset_task;
73 	struct spdk_poller	*reset_retry_timer;
74 	struct spdk_bdev	disk;
75 	char			*filename;
76 	int			fd;
77 	TAILQ_ENTRY(file_disk)  link;
78 	bool			block_size_override;
79 };
80 
81 /* For user space reaping of completions */
82 struct spdk_aio_ring {
83 	uint32_t id;
84 	uint32_t size;
85 	uint32_t head;
86 	uint32_t tail;
87 
88 	uint32_t version;
89 	uint32_t compat_features;
90 	uint32_t incompat_features;
91 	uint32_t header_length;
92 };
93 
94 #define SPDK_AIO_RING_VERSION	0xa10a10a1
95 
96 static int bdev_aio_initialize(void);
97 static void bdev_aio_fini(void);
98 static void aio_free_disk(struct file_disk *fdisk);
99 static void bdev_aio_get_spdk_running_config(FILE *fp);
100 static TAILQ_HEAD(, file_disk) g_aio_disk_head;
101 
102 #define SPDK_AIO_QUEUE_DEPTH 128
103 #define MAX_EVENTS_PER_POLL 32
104 
105 static int
106 bdev_aio_get_ctx_size(void)
107 {
108 	return sizeof(struct bdev_aio_task);
109 }
110 
111 static struct spdk_bdev_module aio_if = {
112 	.name		= "aio",
113 	.module_init	= bdev_aio_initialize,
114 	.module_fini	= bdev_aio_fini,
115 	.config_text	= bdev_aio_get_spdk_running_config,
116 	.get_ctx_size	= bdev_aio_get_ctx_size,
117 };
118 
119 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
120 
121 static int
122 bdev_aio_open(struct file_disk *disk)
123 {
124 	int fd;
125 
126 	fd = open(disk->filename, O_RDWR | O_DIRECT);
127 	if (fd < 0) {
128 		/* Try without O_DIRECT for non-disk files */
129 		fd = open(disk->filename, O_RDWR);
130 		if (fd < 0) {
131 			SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
132 				    disk->filename, errno, spdk_strerror(errno));
133 			disk->fd = -1;
134 			return -1;
135 		}
136 	}
137 
138 	disk->fd = fd;
139 
140 	return 0;
141 }
142 
143 static int
144 bdev_aio_close(struct file_disk *disk)
145 {
146 	int rc;
147 
148 	if (disk->fd == -1) {
149 		return 0;
150 	}
151 
152 	rc = close(disk->fd);
153 	if (rc < 0) {
154 		SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
155 			    disk->fd, errno, spdk_strerror(errno));
156 		return -1;
157 	}
158 
159 	disk->fd = -1;
160 
161 	return 0;
162 }
163 
164 static int64_t
165 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
166 	       struct bdev_aio_task *aio_task,
167 	       struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
168 {
169 	struct iocb *iocb = &aio_task->iocb;
170 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
171 	int rc;
172 
173 	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
174 	iocb->data = aio_task;
175 	aio_task->len = nbytes;
176 	aio_task->ch = aio_ch;
177 
178 	SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
179 		      iovcnt, nbytes, offset);
180 
181 	rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
182 	if (rc < 0) {
183 		if (rc == -EAGAIN) {
184 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
185 		} else {
186 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
187 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
188 		}
189 		return -1;
190 	}
191 	aio_ch->io_inflight++;
192 	return nbytes;
193 }
194 
195 static int64_t
196 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
197 		struct bdev_aio_task *aio_task,
198 		struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
199 {
200 	struct iocb *iocb = &aio_task->iocb;
201 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
202 	int rc;
203 
204 	io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
205 	iocb->data = aio_task;
206 	aio_task->len = len;
207 	aio_task->ch = aio_ch;
208 
209 	SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
210 		      iovcnt, len, offset);
211 
212 	rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
213 	if (rc < 0) {
214 		if (rc == -EAGAIN) {
215 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
216 		} else {
217 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
218 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
219 		}
220 		return -1;
221 	}
222 	aio_ch->io_inflight++;
223 	return len;
224 }
225 
226 static void
227 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
228 {
229 	int rc = fsync(fdisk->fd);
230 
231 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
232 			      rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
233 }
234 
235 static int
236 bdev_aio_destruct(void *ctx)
237 {
238 	struct file_disk *fdisk = ctx;
239 	int rc = 0;
240 
241 	TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
242 	rc = bdev_aio_close(fdisk);
243 	if (rc < 0) {
244 		SPDK_ERRLOG("bdev_aio_close() failed\n");
245 	}
246 	spdk_io_device_unregister(fdisk, NULL);
247 	aio_free_disk(fdisk);
248 	return rc;
249 }
250 
251 static int
252 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
253 {
254 	uint32_t head, tail, count;
255 	struct spdk_aio_ring *ring;
256 	struct timespec timeout;
257 	struct io_event *kevents;
258 
259 	ring = (struct spdk_aio_ring *)io_ctx;
260 
261 	if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
262 		timeout.tv_sec = 0;
263 		timeout.tv_nsec = 0;
264 
265 		return io_getevents(io_ctx, 0, max, uevents, &timeout);
266 	}
267 
268 	/* Read the current state out of the ring */
269 	head = ring->head;
270 	tail = ring->tail;
271 
272 	/* This memory barrier is required to prevent the loads above
273 	 * from being re-ordered with stores to the events array
274 	 * potentially occurring on other threads. */
275 	spdk_smp_rmb();
276 
277 	/* Calculate how many items are in the circular ring */
278 	count = tail - head;
279 	if (tail < head) {
280 		count += ring->size;
281 	}
282 
283 	/* Reduce the count to the limit provided by the user */
284 	count = spdk_min(max, count);
285 
286 	/* Grab the memory location of the event array */
287 	kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
288 
289 	/* Copy the events out of the ring. */
290 	if ((head + count) <= ring->size) {
291 		/* Only one copy is required */
292 		memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
293 	} else {
294 		uint32_t first_part = ring->size - head;
295 		/* Two copies are required */
296 		memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
297 		memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
298 	}
299 
300 	/* Update the head pointer. On x86, stores will not be reordered with older loads,
301 	 * so the copies out of the event array will always be complete prior to this
302 	 * update becoming visible. On other architectures this is not guaranteed, so
303 	 * add a barrier. */
304 #if defined(__i386__) || defined(__x86_64__)
305 	spdk_compiler_barrier();
306 #else
307 	spdk_smp_mb();
308 #endif
309 	ring->head = (head + count) % ring->size;
310 
311 	return count;
312 }
313 
314 static int
315 bdev_aio_group_poll(void *arg)
316 {
317 	struct bdev_aio_group_channel *group_ch = arg;
318 	int nr, i = 0;
319 	enum spdk_bdev_io_status status;
320 	struct bdev_aio_task *aio_task;
321 	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
322 
323 	nr = bdev_user_io_getevents(group_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
324 
325 	if (nr < 0) {
326 		return -1;
327 	}
328 
329 	for (i = 0; i < nr; i++) {
330 		aio_task = events[i].data;
331 		if (events[i].res != aio_task->len) {
332 			status = SPDK_BDEV_IO_STATUS_FAILED;
333 		} else {
334 			status = SPDK_BDEV_IO_STATUS_SUCCESS;
335 		}
336 
337 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
338 		aio_task->ch->io_inflight--;
339 	}
340 
341 	return nr;
342 }
343 
344 static void
345 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
346 {
347 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
348 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
349 
350 	if (aio_ch->io_inflight) {
351 		spdk_for_each_channel_continue(i, -1);
352 		return;
353 	}
354 
355 	spdk_for_each_channel_continue(i, 0);
356 }
357 
358 static int bdev_aio_reset_retry_timer(void *arg);
359 
360 static void
361 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
362 {
363 	struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
364 
365 	if (status == -1) {
366 		fdisk->reset_retry_timer = spdk_poller_register(bdev_aio_reset_retry_timer, fdisk, 500);
367 		return;
368 	}
369 
370 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
371 }
372 
373 static int
374 bdev_aio_reset_retry_timer(void *arg)
375 {
376 	struct file_disk *fdisk = arg;
377 
378 	if (fdisk->reset_retry_timer) {
379 		spdk_poller_unregister(&fdisk->reset_retry_timer);
380 	}
381 
382 	spdk_for_each_channel(fdisk,
383 			      _bdev_aio_get_io_inflight,
384 			      fdisk,
385 			      _bdev_aio_get_io_inflight_done);
386 
387 	return -1;
388 }
389 
390 static void
391 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
392 {
393 	fdisk->reset_task = aio_task;
394 
395 	bdev_aio_reset_retry_timer(fdisk);
396 }
397 
398 static void
399 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
400 		    bool success)
401 {
402 	if (!success) {
403 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
404 		return;
405 	}
406 
407 	switch (bdev_io->type) {
408 	case SPDK_BDEV_IO_TYPE_READ:
409 		bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
410 			       ch,
411 			       (struct bdev_aio_task *)bdev_io->driver_ctx,
412 			       bdev_io->u.bdev.iovs,
413 			       bdev_io->u.bdev.iovcnt,
414 			       bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
415 			       bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
416 		break;
417 	case SPDK_BDEV_IO_TYPE_WRITE:
418 		bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
419 				ch,
420 				(struct bdev_aio_task *)bdev_io->driver_ctx,
421 				bdev_io->u.bdev.iovs,
422 				bdev_io->u.bdev.iovcnt,
423 				bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
424 				bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
425 		break;
426 	default:
427 		SPDK_ERRLOG("Wrong io type\n");
428 		break;
429 	}
430 }
431 
432 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
433 {
434 	switch (bdev_io->type) {
435 	/* Read and write operations must be performed on buffers aligned to
436 	 * bdev->required_alignment. If user specified unaligned buffers,
437 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
438 	case SPDK_BDEV_IO_TYPE_READ:
439 	case SPDK_BDEV_IO_TYPE_WRITE:
440 		spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
441 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
442 		return 0;
443 	case SPDK_BDEV_IO_TYPE_FLUSH:
444 		bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
445 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
446 		return 0;
447 
448 	case SPDK_BDEV_IO_TYPE_RESET:
449 		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
450 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
451 		return 0;
452 	default:
453 		return -1;
454 	}
455 }
456 
457 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
458 {
459 	if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
460 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
461 	}
462 }
463 
464 static bool
465 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
466 {
467 	switch (io_type) {
468 	case SPDK_BDEV_IO_TYPE_READ:
469 	case SPDK_BDEV_IO_TYPE_WRITE:
470 	case SPDK_BDEV_IO_TYPE_FLUSH:
471 	case SPDK_BDEV_IO_TYPE_RESET:
472 		return true;
473 
474 	default:
475 		return false;
476 	}
477 }
478 
479 static int
480 bdev_aio_create_cb(void *io_device, void *ctx_buf)
481 {
482 	struct bdev_aio_io_channel *ch = ctx_buf;
483 
484 	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
485 
486 	return 0;
487 }
488 
489 static void
490 bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
491 {
492 	struct bdev_aio_io_channel *ch = ctx_buf;
493 
494 	spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
495 }
496 
497 static struct spdk_io_channel *
498 bdev_aio_get_io_channel(void *ctx)
499 {
500 	struct file_disk *fdisk = ctx;
501 
502 	return spdk_get_io_channel(fdisk);
503 }
504 
505 
506 static int
507 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
508 {
509 	struct file_disk *fdisk = ctx;
510 
511 	spdk_json_write_named_object_begin(w, "aio");
512 
513 	spdk_json_write_named_string(w, "filename", fdisk->filename);
514 
515 	spdk_json_write_object_end(w);
516 
517 	return 0;
518 }
519 
520 static void
521 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
522 {
523 	struct file_disk *fdisk = bdev->ctxt;
524 
525 	spdk_json_write_object_begin(w);
526 
527 	spdk_json_write_named_string(w, "method", "bdev_aio_create");
528 
529 	spdk_json_write_named_object_begin(w, "params");
530 	spdk_json_write_named_string(w, "name", bdev->name);
531 	if (fdisk->block_size_override) {
532 		spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
533 	}
534 	spdk_json_write_named_string(w, "filename", fdisk->filename);
535 	spdk_json_write_object_end(w);
536 
537 	spdk_json_write_object_end(w);
538 }
539 
540 static const struct spdk_bdev_fn_table aio_fn_table = {
541 	.destruct		= bdev_aio_destruct,
542 	.submit_request		= bdev_aio_submit_request,
543 	.io_type_supported	= bdev_aio_io_type_supported,
544 	.get_io_channel		= bdev_aio_get_io_channel,
545 	.dump_info_json		= bdev_aio_dump_info_json,
546 	.write_config_json	= bdev_aio_write_json_config,
547 };
548 
549 static void aio_free_disk(struct file_disk *fdisk)
550 {
551 	if (fdisk == NULL) {
552 		return;
553 	}
554 	free(fdisk->filename);
555 	free(fdisk->disk.name);
556 	free(fdisk);
557 }
558 
559 static int
560 bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
561 {
562 	struct bdev_aio_group_channel *ch = ctx_buf;
563 
564 	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
565 		SPDK_ERRLOG("async I/O context setup failure\n");
566 		return -1;
567 	}
568 
569 	ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0);
570 	return 0;
571 }
572 
573 static void
574 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
575 {
576 	struct bdev_aio_group_channel *ch = ctx_buf;
577 
578 	io_destroy(ch->io_ctx);
579 
580 	spdk_poller_unregister(&ch->poller);
581 }
582 
583 int
584 create_aio_bdev(const char *name, const char *filename, uint32_t block_size)
585 {
586 	struct file_disk *fdisk;
587 	uint32_t detected_block_size;
588 	uint64_t disk_size;
589 	int rc;
590 
591 	fdisk = calloc(1, sizeof(*fdisk));
592 	if (!fdisk) {
593 		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
594 		return -ENOMEM;
595 	}
596 
597 	fdisk->filename = strdup(filename);
598 	if (!fdisk->filename) {
599 		rc = -ENOMEM;
600 		goto error_return;
601 	}
602 
603 	if (bdev_aio_open(fdisk)) {
604 		SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
605 		rc = -errno;
606 		goto error_return;
607 	}
608 
609 	disk_size = spdk_fd_get_size(fdisk->fd);
610 
611 	fdisk->disk.name = strdup(name);
612 	if (!fdisk->disk.name) {
613 		rc = -ENOMEM;
614 		goto error_return;
615 	}
616 	fdisk->disk.product_name = "AIO disk";
617 	fdisk->disk.module = &aio_if;
618 
619 	fdisk->disk.write_cache = 1;
620 
621 	detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
622 	if (block_size == 0) {
623 		/* User did not specify block size - use autodetected block size. */
624 		if (detected_block_size == 0) {
625 			SPDK_ERRLOG("Block size could not be auto-detected\n");
626 			rc = -EINVAL;
627 			goto error_return;
628 		}
629 		fdisk->block_size_override = false;
630 		block_size = detected_block_size;
631 	} else {
632 		if (block_size < detected_block_size) {
633 			SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
634 				    "auto-detected block size %" PRIu32 "\n",
635 				    block_size, detected_block_size);
636 			rc = -EINVAL;
637 			goto error_return;
638 		} else if (detected_block_size != 0 && block_size != detected_block_size) {
639 			SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
640 				     "auto-detected block size %" PRIu32 "\n",
641 				     block_size, detected_block_size);
642 		}
643 		fdisk->block_size_override = true;
644 	}
645 
646 	if (block_size < 512) {
647 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
648 		rc = -EINVAL;
649 		goto error_return;
650 	}
651 
652 	if (!spdk_u32_is_pow2(block_size)) {
653 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
654 		rc = -EINVAL;
655 		goto error_return;
656 	}
657 
658 	fdisk->disk.blocklen = block_size;
659 	fdisk->disk.required_alignment = spdk_u32log2(block_size);
660 
661 	if (disk_size % fdisk->disk.blocklen != 0) {
662 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
663 			    disk_size, fdisk->disk.blocklen);
664 		rc = -EINVAL;
665 		goto error_return;
666 	}
667 
668 	fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
669 	fdisk->disk.ctxt = fdisk;
670 
671 	fdisk->disk.fn_table = &aio_fn_table;
672 
673 	spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
674 				sizeof(struct bdev_aio_io_channel),
675 				fdisk->disk.name);
676 	rc = spdk_bdev_register(&fdisk->disk);
677 	if (rc) {
678 		spdk_io_device_unregister(fdisk, NULL);
679 		goto error_return;
680 	}
681 
682 	TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
683 	return 0;
684 
685 error_return:
686 	bdev_aio_close(fdisk);
687 	aio_free_disk(fdisk);
688 	return rc;
689 }
690 
691 struct delete_aio_bdev_ctx {
692 	delete_aio_bdev_complete cb_fn;
693 	void *cb_arg;
694 };
695 
696 static void
697 aio_bdev_unregister_cb(void *arg, int bdeverrno)
698 {
699 	struct delete_aio_bdev_ctx *ctx = arg;
700 
701 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
702 	free(ctx);
703 }
704 
705 void
706 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg)
707 {
708 	struct delete_aio_bdev_ctx *ctx;
709 
710 	if (!bdev || bdev->module != &aio_if) {
711 		cb_fn(cb_arg, -ENODEV);
712 		return;
713 	}
714 
715 	ctx = calloc(1, sizeof(*ctx));
716 	if (ctx == NULL) {
717 		cb_fn(cb_arg, -ENOMEM);
718 		return;
719 	}
720 
721 	ctx->cb_fn = cb_fn;
722 	ctx->cb_arg = cb_arg;
723 	spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx);
724 }
725 
726 static int
727 bdev_aio_initialize(void)
728 {
729 	size_t i;
730 	struct spdk_conf_section *sp;
731 	int rc = 0;
732 
733 	TAILQ_INIT(&g_aio_disk_head);
734 	spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
735 				sizeof(struct bdev_aio_group_channel),
736 				"aio_module");
737 
738 	sp = spdk_conf_find_section(NULL, "AIO");
739 	if (!sp) {
740 		return 0;
741 	}
742 
743 	i = 0;
744 	while (true) {
745 		const char *file;
746 		const char *name;
747 		const char *block_size_str;
748 		uint32_t block_size = 0;
749 		long int tmp;
750 
751 		file = spdk_conf_section_get_nmval(sp, "AIO", i, 0);
752 		if (!file) {
753 			break;
754 		}
755 
756 		name = spdk_conf_section_get_nmval(sp, "AIO", i, 1);
757 		if (!name) {
758 			SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file);
759 			i++;
760 			continue;
761 		}
762 
763 		block_size_str = spdk_conf_section_get_nmval(sp, "AIO", i, 2);
764 		if (block_size_str) {
765 			tmp = spdk_strtol(block_size_str, 10);
766 			if (tmp < 0) {
767 				SPDK_ERRLOG("Invalid block size for AIO disk with file %s\n", file);
768 				i++;
769 				continue;
770 			}
771 			block_size = (uint32_t)tmp;
772 		}
773 
774 		rc = create_aio_bdev(name, file, block_size);
775 		if (rc) {
776 			SPDK_ERRLOG("Unable to create AIO bdev from file %s, err is %s\n", file, spdk_strerror(-rc));
777 		}
778 
779 		i++;
780 	}
781 
782 	return 0;
783 }
784 
785 static void
786 bdev_aio_fini(void)
787 {
788 	spdk_io_device_unregister(&aio_if, NULL);
789 }
790 
791 static void
792 bdev_aio_get_spdk_running_config(FILE *fp)
793 {
794 	char			*file;
795 	char			*name;
796 	uint32_t		block_size;
797 	struct file_disk	*fdisk;
798 
799 	fprintf(fp,
800 		"\n"
801 		"# Users must change this section to match the /dev/sdX devices to be\n"
802 		"# exported as iSCSI LUNs. The devices are accessed using Linux AIO.\n"
803 		"# The format is:\n"
804 		"# AIO <file name> <bdev name> [<block size>]\n"
805 		"# The file name is the backing device\n"
806 		"# The bdev name can be referenced from elsewhere in the configuration file.\n"
807 		"# Block size may be omitted to automatically detect the block size of a disk.\n"
808 		"[AIO]\n");
809 
810 	TAILQ_FOREACH(fdisk, &g_aio_disk_head, link) {
811 		file = fdisk->filename;
812 		name = fdisk->disk.name;
813 		block_size = fdisk->disk.blocklen;
814 		fprintf(fp, "  AIO %s %s ", file, name);
815 		if (fdisk->block_size_override) {
816 			fprintf(fp, "%d", block_size);
817 		}
818 		fprintf(fp, "\n");
819 	}
820 	fprintf(fp, "\n");
821 }
822 
823 SPDK_LOG_REGISTER_COMPONENT("aio", SPDK_LOG_AIO)
824