xref: /spdk/module/bdev/aio/bdev_aio.c (revision 2172c432cfdaecc5a279d64e37c6b51e794683c1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_aio.h"
35 
36 #include "spdk/stdinc.h"
37 
38 #include "spdk/barrier.h"
39 #include "spdk/bdev.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk/conf.h"
42 #include "spdk/env.h"
43 #include "spdk/fd.h"
44 #include "spdk/likely.h"
45 #include "spdk/thread.h"
46 #include "spdk/json.h"
47 #include "spdk/util.h"
48 #include "spdk/string.h"
49 
50 #include "spdk_internal/log.h"
51 
52 #include <libaio.h>
53 
54 struct bdev_aio_io_channel {
55 	uint64_t				io_inflight;
56 	io_context_t				io_ctx;
57 	struct bdev_aio_group_channel		*group_ch;
58 	TAILQ_ENTRY(bdev_aio_io_channel)	link;
59 };
60 
61 struct bdev_aio_group_channel {
62 	struct spdk_poller			*poller;
63 	TAILQ_HEAD(, bdev_aio_io_channel)	io_ch_head;
64 };
65 
66 struct bdev_aio_task {
67 	struct iocb			iocb;
68 	uint64_t			len;
69 	struct bdev_aio_io_channel	*ch;
70 	TAILQ_ENTRY(bdev_aio_task)	link;
71 };
72 
73 struct file_disk {
74 	struct bdev_aio_task	*reset_task;
75 	struct spdk_poller	*reset_retry_timer;
76 	struct spdk_bdev	disk;
77 	char			*filename;
78 	int			fd;
79 	TAILQ_ENTRY(file_disk)  link;
80 	bool			block_size_override;
81 };
82 
83 /* For user space reaping of completions */
84 struct spdk_aio_ring {
85 	uint32_t id;
86 	uint32_t size;
87 	uint32_t head;
88 	uint32_t tail;
89 
90 	uint32_t version;
91 	uint32_t compat_features;
92 	uint32_t incompat_features;
93 	uint32_t header_length;
94 };
95 
96 #define SPDK_AIO_RING_VERSION	0xa10a10a1
97 
98 static int bdev_aio_initialize(void);
99 static void bdev_aio_fini(void);
100 static void aio_free_disk(struct file_disk *fdisk);
101 static void bdev_aio_get_spdk_running_config(FILE *fp);
102 static TAILQ_HEAD(, file_disk) g_aio_disk_head;
103 
104 #define SPDK_AIO_QUEUE_DEPTH 128
105 #define MAX_EVENTS_PER_POLL 32
106 
107 static int
108 bdev_aio_get_ctx_size(void)
109 {
110 	return sizeof(struct bdev_aio_task);
111 }
112 
113 static struct spdk_bdev_module aio_if = {
114 	.name		= "aio",
115 	.module_init	= bdev_aio_initialize,
116 	.module_fini	= bdev_aio_fini,
117 	.config_text	= bdev_aio_get_spdk_running_config,
118 	.get_ctx_size	= bdev_aio_get_ctx_size,
119 };
120 
121 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
122 
123 static int
124 bdev_aio_open(struct file_disk *disk)
125 {
126 	int fd;
127 
128 	fd = open(disk->filename, O_RDWR | O_DIRECT);
129 	if (fd < 0) {
130 		/* Try without O_DIRECT for non-disk files */
131 		fd = open(disk->filename, O_RDWR);
132 		if (fd < 0) {
133 			SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
134 				    disk->filename, errno, spdk_strerror(errno));
135 			disk->fd = -1;
136 			return -1;
137 		}
138 	}
139 
140 	disk->fd = fd;
141 
142 	return 0;
143 }
144 
145 static int
146 bdev_aio_close(struct file_disk *disk)
147 {
148 	int rc;
149 
150 	if (disk->fd == -1) {
151 		return 0;
152 	}
153 
154 	rc = close(disk->fd);
155 	if (rc < 0) {
156 		SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
157 			    disk->fd, errno, spdk_strerror(errno));
158 		return -1;
159 	}
160 
161 	disk->fd = -1;
162 
163 	return 0;
164 }
165 
166 static int64_t
167 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
168 	       struct bdev_aio_task *aio_task,
169 	       struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
170 {
171 	struct iocb *iocb = &aio_task->iocb;
172 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
173 	int rc;
174 
175 	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
176 	iocb->data = aio_task;
177 	aio_task->len = nbytes;
178 	aio_task->ch = aio_ch;
179 
180 	SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
181 		      iovcnt, nbytes, offset);
182 
183 	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
184 	if (rc < 0) {
185 		if (rc == -EAGAIN) {
186 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
187 		} else {
188 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
189 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
190 		}
191 		return -1;
192 	}
193 	aio_ch->io_inflight++;
194 	return nbytes;
195 }
196 
197 static int64_t
198 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
199 		struct bdev_aio_task *aio_task,
200 		struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
201 {
202 	struct iocb *iocb = &aio_task->iocb;
203 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
204 	int rc;
205 
206 	io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
207 	iocb->data = aio_task;
208 	aio_task->len = len;
209 	aio_task->ch = aio_ch;
210 
211 	SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
212 		      iovcnt, len, offset);
213 
214 	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
215 	if (rc < 0) {
216 		if (rc == -EAGAIN) {
217 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
218 		} else {
219 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
220 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
221 		}
222 		return -1;
223 	}
224 	aio_ch->io_inflight++;
225 	return len;
226 }
227 
228 static void
229 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
230 {
231 	int rc = fsync(fdisk->fd);
232 
233 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
234 			      rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
235 }
236 
237 static int
238 bdev_aio_destruct(void *ctx)
239 {
240 	struct file_disk *fdisk = ctx;
241 	int rc = 0;
242 
243 	TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
244 	rc = bdev_aio_close(fdisk);
245 	if (rc < 0) {
246 		SPDK_ERRLOG("bdev_aio_close() failed\n");
247 	}
248 	spdk_io_device_unregister(fdisk, NULL);
249 	aio_free_disk(fdisk);
250 	return rc;
251 }
252 
253 static int
254 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
255 {
256 	uint32_t head, tail, count;
257 	struct spdk_aio_ring *ring;
258 	struct timespec timeout;
259 	struct io_event *kevents;
260 
261 	ring = (struct spdk_aio_ring *)io_ctx;
262 
263 	if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
264 		timeout.tv_sec = 0;
265 		timeout.tv_nsec = 0;
266 
267 		return io_getevents(io_ctx, 0, max, uevents, &timeout);
268 	}
269 
270 	/* Read the current state out of the ring */
271 	head = ring->head;
272 	tail = ring->tail;
273 
274 	/* This memory barrier is required to prevent the loads above
275 	 * from being re-ordered with stores to the events array
276 	 * potentially occurring on other threads. */
277 	spdk_smp_rmb();
278 
279 	/* Calculate how many items are in the circular ring */
280 	count = tail - head;
281 	if (tail < head) {
282 		count += ring->size;
283 	}
284 
285 	/* Reduce the count to the limit provided by the user */
286 	count = spdk_min(max, count);
287 
288 	/* Grab the memory location of the event array */
289 	kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
290 
291 	/* Copy the events out of the ring. */
292 	if ((head + count) <= ring->size) {
293 		/* Only one copy is required */
294 		memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
295 	} else {
296 		uint32_t first_part = ring->size - head;
297 		/* Two copies are required */
298 		memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
299 		memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
300 	}
301 
302 	/* Update the head pointer. On x86, stores will not be reordered with older loads,
303 	 * so the copies out of the event array will always be complete prior to this
304 	 * update becoming visible. On other architectures this is not guaranteed, so
305 	 * add a barrier. */
306 #if defined(__i386__) || defined(__x86_64__)
307 	spdk_compiler_barrier();
308 #else
309 	spdk_smp_mb();
310 #endif
311 	ring->head = (head + count) % ring->size;
312 
313 	return count;
314 }
315 
316 static int
317 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
318 {
319 	int nr, i = 0;
320 	enum spdk_bdev_io_status status;
321 	struct bdev_aio_task *aio_task;
322 	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
323 
324 	nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
325 
326 	if (nr < 0) {
327 		return 0;
328 	}
329 
330 	for (i = 0; i < nr; i++) {
331 		aio_task = events[i].data;
332 		if (events[i].res != aio_task->len) {
333 			status = SPDK_BDEV_IO_STATUS_FAILED;
334 		} else {
335 			status = SPDK_BDEV_IO_STATUS_SUCCESS;
336 		}
337 
338 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
339 		aio_task->ch->io_inflight--;
340 	}
341 
342 	return nr;
343 }
344 
345 static int
346 bdev_aio_group_poll(void *arg)
347 {
348 	struct bdev_aio_group_channel *group_ch = arg;
349 	struct bdev_aio_io_channel *io_ch;
350 	int nr = 0;
351 
352 	TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
353 		nr += bdev_aio_io_channel_poll(io_ch);
354 	}
355 
356 	return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
357 }
358 
359 static void
360 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
361 {
362 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
363 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
364 
365 	if (aio_ch->io_inflight) {
366 		spdk_for_each_channel_continue(i, -1);
367 		return;
368 	}
369 
370 	spdk_for_each_channel_continue(i, 0);
371 }
372 
373 static int bdev_aio_reset_retry_timer(void *arg);
374 
375 static void
376 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
377 {
378 	struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
379 
380 	if (status == -1) {
381 		fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500);
382 		return;
383 	}
384 
385 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
386 }
387 
388 static int
389 bdev_aio_reset_retry_timer(void *arg)
390 {
391 	struct file_disk *fdisk = arg;
392 
393 	if (fdisk->reset_retry_timer) {
394 		spdk_poller_unregister(&fdisk->reset_retry_timer);
395 	}
396 
397 	spdk_for_each_channel(fdisk,
398 			      _bdev_aio_get_io_inflight,
399 			      fdisk,
400 			      _bdev_aio_get_io_inflight_done);
401 
402 	return SPDK_POLLER_BUSY;
403 }
404 
405 static void
406 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
407 {
408 	fdisk->reset_task = aio_task;
409 
410 	bdev_aio_reset_retry_timer(fdisk);
411 }
412 
413 static void
414 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
415 		    bool success)
416 {
417 	if (!success) {
418 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
419 		return;
420 	}
421 
422 	switch (bdev_io->type) {
423 	case SPDK_BDEV_IO_TYPE_READ:
424 		bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
425 			       ch,
426 			       (struct bdev_aio_task *)bdev_io->driver_ctx,
427 			       bdev_io->u.bdev.iovs,
428 			       bdev_io->u.bdev.iovcnt,
429 			       bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
430 			       bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
431 		break;
432 	case SPDK_BDEV_IO_TYPE_WRITE:
433 		bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
434 				ch,
435 				(struct bdev_aio_task *)bdev_io->driver_ctx,
436 				bdev_io->u.bdev.iovs,
437 				bdev_io->u.bdev.iovcnt,
438 				bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
439 				bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
440 		break;
441 	default:
442 		SPDK_ERRLOG("Wrong io type\n");
443 		break;
444 	}
445 }
446 
447 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
448 {
449 	switch (bdev_io->type) {
450 	/* Read and write operations must be performed on buffers aligned to
451 	 * bdev->required_alignment. If user specified unaligned buffers,
452 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
453 	case SPDK_BDEV_IO_TYPE_READ:
454 	case SPDK_BDEV_IO_TYPE_WRITE:
455 		spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
456 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
457 		return 0;
458 	case SPDK_BDEV_IO_TYPE_FLUSH:
459 		bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
460 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
461 		return 0;
462 
463 	case SPDK_BDEV_IO_TYPE_RESET:
464 		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
465 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
466 		return 0;
467 	default:
468 		return -1;
469 	}
470 }
471 
472 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
473 {
474 	if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
475 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
476 	}
477 }
478 
479 static bool
480 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
481 {
482 	switch (io_type) {
483 	case SPDK_BDEV_IO_TYPE_READ:
484 	case SPDK_BDEV_IO_TYPE_WRITE:
485 	case SPDK_BDEV_IO_TYPE_FLUSH:
486 	case SPDK_BDEV_IO_TYPE_RESET:
487 		return true;
488 
489 	default:
490 		return false;
491 	}
492 }
493 
494 static int
495 bdev_aio_create_cb(void *io_device, void *ctx_buf)
496 {
497 	struct bdev_aio_io_channel *ch = ctx_buf;
498 
499 	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
500 		SPDK_ERRLOG("async I/O context setup failure\n");
501 		return -1;
502 	}
503 
504 	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
505 	TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
506 
507 	return 0;
508 }
509 
510 static void
511 bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
512 {
513 	struct bdev_aio_io_channel *ch = ctx_buf;
514 
515 	io_destroy(ch->io_ctx);
516 
517 	assert(ch->group_ch);
518 	TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
519 
520 	spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
521 }
522 
523 static struct spdk_io_channel *
524 bdev_aio_get_io_channel(void *ctx)
525 {
526 	struct file_disk *fdisk = ctx;
527 
528 	return spdk_get_io_channel(fdisk);
529 }
530 
531 
532 static int
533 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
534 {
535 	struct file_disk *fdisk = ctx;
536 
537 	spdk_json_write_named_object_begin(w, "aio");
538 
539 	spdk_json_write_named_string(w, "filename", fdisk->filename);
540 
541 	spdk_json_write_object_end(w);
542 
543 	return 0;
544 }
545 
546 static void
547 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
548 {
549 	struct file_disk *fdisk = bdev->ctxt;
550 
551 	spdk_json_write_object_begin(w);
552 
553 	spdk_json_write_named_string(w, "method", "bdev_aio_create");
554 
555 	spdk_json_write_named_object_begin(w, "params");
556 	spdk_json_write_named_string(w, "name", bdev->name);
557 	if (fdisk->block_size_override) {
558 		spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
559 	}
560 	spdk_json_write_named_string(w, "filename", fdisk->filename);
561 	spdk_json_write_object_end(w);
562 
563 	spdk_json_write_object_end(w);
564 }
565 
566 static const struct spdk_bdev_fn_table aio_fn_table = {
567 	.destruct		= bdev_aio_destruct,
568 	.submit_request		= bdev_aio_submit_request,
569 	.io_type_supported	= bdev_aio_io_type_supported,
570 	.get_io_channel		= bdev_aio_get_io_channel,
571 	.dump_info_json		= bdev_aio_dump_info_json,
572 	.write_config_json	= bdev_aio_write_json_config,
573 };
574 
575 static void aio_free_disk(struct file_disk *fdisk)
576 {
577 	if (fdisk == NULL) {
578 		return;
579 	}
580 	free(fdisk->filename);
581 	free(fdisk->disk.name);
582 	free(fdisk);
583 }
584 
585 static int
586 bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
587 {
588 	struct bdev_aio_group_channel *ch = ctx_buf;
589 
590 	TAILQ_INIT(&ch->io_ch_head);
591 
592 	ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
593 	return 0;
594 }
595 
596 static void
597 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
598 {
599 	struct bdev_aio_group_channel *ch = ctx_buf;
600 
601 	if (!TAILQ_EMPTY(&ch->io_ch_head)) {
602 		SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
603 	}
604 
605 	spdk_poller_unregister(&ch->poller);
606 }
607 
608 int
609 create_aio_bdev(const char *name, const char *filename, uint32_t block_size)
610 {
611 	struct file_disk *fdisk;
612 	uint32_t detected_block_size;
613 	uint64_t disk_size;
614 	int rc;
615 
616 	fdisk = calloc(1, sizeof(*fdisk));
617 	if (!fdisk) {
618 		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
619 		return -ENOMEM;
620 	}
621 
622 	fdisk->filename = strdup(filename);
623 	if (!fdisk->filename) {
624 		rc = -ENOMEM;
625 		goto error_return;
626 	}
627 
628 	if (bdev_aio_open(fdisk)) {
629 		SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
630 		rc = -errno;
631 		goto error_return;
632 	}
633 
634 	disk_size = spdk_fd_get_size(fdisk->fd);
635 
636 	fdisk->disk.name = strdup(name);
637 	if (!fdisk->disk.name) {
638 		rc = -ENOMEM;
639 		goto error_return;
640 	}
641 	fdisk->disk.product_name = "AIO disk";
642 	fdisk->disk.module = &aio_if;
643 
644 	fdisk->disk.write_cache = 1;
645 
646 	detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
647 	if (block_size == 0) {
648 		/* User did not specify block size - use autodetected block size. */
649 		if (detected_block_size == 0) {
650 			SPDK_ERRLOG("Block size could not be auto-detected\n");
651 			rc = -EINVAL;
652 			goto error_return;
653 		}
654 		fdisk->block_size_override = false;
655 		block_size = detected_block_size;
656 	} else {
657 		if (block_size < detected_block_size) {
658 			SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
659 				    "auto-detected block size %" PRIu32 "\n",
660 				    block_size, detected_block_size);
661 			rc = -EINVAL;
662 			goto error_return;
663 		} else if (detected_block_size != 0 && block_size != detected_block_size) {
664 			SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
665 				     "auto-detected block size %" PRIu32 "\n",
666 				     block_size, detected_block_size);
667 		}
668 		fdisk->block_size_override = true;
669 	}
670 
671 	if (block_size < 512) {
672 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
673 		rc = -EINVAL;
674 		goto error_return;
675 	}
676 
677 	if (!spdk_u32_is_pow2(block_size)) {
678 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
679 		rc = -EINVAL;
680 		goto error_return;
681 	}
682 
683 	fdisk->disk.blocklen = block_size;
684 	if (fdisk->block_size_override && detected_block_size) {
685 		fdisk->disk.required_alignment = spdk_u32log2(detected_block_size);
686 	} else {
687 		fdisk->disk.required_alignment = spdk_u32log2(block_size);
688 	}
689 
690 	if (disk_size % fdisk->disk.blocklen != 0) {
691 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
692 			    disk_size, fdisk->disk.blocklen);
693 		rc = -EINVAL;
694 		goto error_return;
695 	}
696 
697 	fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
698 	fdisk->disk.ctxt = fdisk;
699 
700 	fdisk->disk.fn_table = &aio_fn_table;
701 
702 	spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
703 				sizeof(struct bdev_aio_io_channel),
704 				fdisk->disk.name);
705 	rc = spdk_bdev_register(&fdisk->disk);
706 	if (rc) {
707 		spdk_io_device_unregister(fdisk, NULL);
708 		goto error_return;
709 	}
710 
711 	TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
712 	return 0;
713 
714 error_return:
715 	bdev_aio_close(fdisk);
716 	aio_free_disk(fdisk);
717 	return rc;
718 }
719 
720 struct delete_aio_bdev_ctx {
721 	delete_aio_bdev_complete cb_fn;
722 	void *cb_arg;
723 };
724 
725 static void
726 aio_bdev_unregister_cb(void *arg, int bdeverrno)
727 {
728 	struct delete_aio_bdev_ctx *ctx = arg;
729 
730 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
731 	free(ctx);
732 }
733 
734 void
735 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg)
736 {
737 	struct delete_aio_bdev_ctx *ctx;
738 
739 	if (!bdev || bdev->module != &aio_if) {
740 		cb_fn(cb_arg, -ENODEV);
741 		return;
742 	}
743 
744 	ctx = calloc(1, sizeof(*ctx));
745 	if (ctx == NULL) {
746 		cb_fn(cb_arg, -ENOMEM);
747 		return;
748 	}
749 
750 	ctx->cb_fn = cb_fn;
751 	ctx->cb_arg = cb_arg;
752 	spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx);
753 }
754 
755 static int
756 bdev_aio_initialize(void)
757 {
758 	size_t i;
759 	struct spdk_conf_section *sp;
760 	int rc = 0;
761 
762 	TAILQ_INIT(&g_aio_disk_head);
763 	spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
764 				sizeof(struct bdev_aio_group_channel),
765 				"aio_module");
766 
767 	sp = spdk_conf_find_section(NULL, "AIO");
768 	if (!sp) {
769 		return 0;
770 	}
771 
772 	i = 0;
773 	while (true) {
774 		const char *file;
775 		const char *name;
776 		const char *block_size_str;
777 		uint32_t block_size = 0;
778 		long int tmp;
779 
780 		file = spdk_conf_section_get_nmval(sp, "AIO", i, 0);
781 		if (!file) {
782 			break;
783 		}
784 
785 		name = spdk_conf_section_get_nmval(sp, "AIO", i, 1);
786 		if (!name) {
787 			SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file);
788 			i++;
789 			continue;
790 		}
791 
792 		block_size_str = spdk_conf_section_get_nmval(sp, "AIO", i, 2);
793 		if (block_size_str) {
794 			tmp = spdk_strtol(block_size_str, 10);
795 			if (tmp < 0) {
796 				SPDK_ERRLOG("Invalid block size for AIO disk with file %s\n", file);
797 				i++;
798 				continue;
799 			}
800 			block_size = (uint32_t)tmp;
801 		}
802 
803 		rc = create_aio_bdev(name, file, block_size);
804 		if (rc) {
805 			SPDK_ERRLOG("Unable to create AIO bdev from file %s, err is %s\n", file, spdk_strerror(-rc));
806 		}
807 
808 		i++;
809 	}
810 
811 	return 0;
812 }
813 
814 static void
815 bdev_aio_fini(void)
816 {
817 	spdk_io_device_unregister(&aio_if, NULL);
818 }
819 
820 static void
821 bdev_aio_get_spdk_running_config(FILE *fp)
822 {
823 	char			*file;
824 	char			*name;
825 	uint32_t		block_size;
826 	struct file_disk	*fdisk;
827 
828 	fprintf(fp,
829 		"\n"
830 		"# Users must change this section to match the /dev/sdX devices to be\n"
831 		"# exported as iSCSI LUNs. The devices are accessed using Linux AIO.\n"
832 		"# The format is:\n"
833 		"# AIO <file name> <bdev name> [<block size>]\n"
834 		"# The file name is the backing device\n"
835 		"# The bdev name can be referenced from elsewhere in the configuration file.\n"
836 		"# Block size may be omitted to automatically detect the block size of a disk.\n"
837 		"[AIO]\n");
838 
839 	TAILQ_FOREACH(fdisk, &g_aio_disk_head, link) {
840 		file = fdisk->filename;
841 		name = fdisk->disk.name;
842 		block_size = fdisk->disk.blocklen;
843 		fprintf(fp, "  AIO %s %s ", file, name);
844 		if (fdisk->block_size_override) {
845 			fprintf(fp, "%d", block_size);
846 		}
847 		fprintf(fp, "\n");
848 	}
849 	fprintf(fp, "\n");
850 }
851 
852 SPDK_LOG_REGISTER_COMPONENT(aio)
853