xref: /spdk/module/bdev/aio/bdev_aio.c (revision 3428322b812fe31cc3e1d0308a7f5bd4b06b9886)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2017 Intel Corporation.
307fe6a43SSeth Howell  *   All rights reserved.
4b5b75279SAlexey Marchuk  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
507fe6a43SSeth Howell  */
607fe6a43SSeth Howell 
707fe6a43SSeth Howell #include "bdev_aio.h"
807fe6a43SSeth Howell 
907fe6a43SSeth Howell #include "spdk/stdinc.h"
1007fe6a43SSeth Howell 
1107fe6a43SSeth Howell #include "spdk/barrier.h"
1207fe6a43SSeth Howell #include "spdk/bdev.h"
1307fe6a43SSeth Howell #include "spdk/bdev_module.h"
1407fe6a43SSeth Howell #include "spdk/env.h"
1507fe6a43SSeth Howell #include "spdk/fd.h"
1607fe6a43SSeth Howell #include "spdk/likely.h"
1707fe6a43SSeth Howell #include "spdk/thread.h"
1807fe6a43SSeth Howell #include "spdk/json.h"
1907fe6a43SSeth Howell #include "spdk/util.h"
2007fe6a43SSeth Howell #include "spdk/string.h"
2107fe6a43SSeth Howell 
224e8e97c8STomasz Zawadzki #include "spdk/log.h"
2307fe6a43SSeth Howell 
24ad8c05dcSLiu Xiaodong #include <sys/eventfd.h>
259e06b192SFedor Uporov 
269e06b192SFedor Uporov #ifndef __FreeBSD__
2707fe6a43SSeth Howell #include <libaio.h>
289e06b192SFedor Uporov #endif
2907fe6a43SSeth Howell 
3007fe6a43SSeth Howell struct bdev_aio_io_channel {
3107fe6a43SSeth Howell 	uint64_t				io_inflight;
329e06b192SFedor Uporov #ifdef __FreeBSD__
339e06b192SFedor Uporov 	int					kqfd;
349e06b192SFedor Uporov #else
35195fb4e4SXiaodong Liu 	io_context_t				io_ctx;
369e06b192SFedor Uporov #endif
3707fe6a43SSeth Howell 	struct bdev_aio_group_channel		*group_ch;
38195fb4e4SXiaodong Liu 	TAILQ_ENTRY(bdev_aio_io_channel)	link;
3907fe6a43SSeth Howell };
4007fe6a43SSeth Howell 
4107fe6a43SSeth Howell struct bdev_aio_group_channel {
427f6afb7bSLiu Xiaodong 	/* eventfd for io completion notification in interrupt mode.
437f6afb7bSLiu Xiaodong 	 * Negative value like '-1' indicates it is invalid or unused.
447f6afb7bSLiu Xiaodong 	 */
45ad8c05dcSLiu Xiaodong 	int					efd;
46ad8c05dcSLiu Xiaodong 	struct spdk_interrupt			*intr;
4707fe6a43SSeth Howell 	struct spdk_poller			*poller;
48195fb4e4SXiaodong Liu 	TAILQ_HEAD(, bdev_aio_io_channel)	io_ch_head;
4907fe6a43SSeth Howell };
5007fe6a43SSeth Howell 
5107fe6a43SSeth Howell struct bdev_aio_task {
529e06b192SFedor Uporov #ifdef __FreeBSD__
539e06b192SFedor Uporov 	struct aiocb			aiocb;
549e06b192SFedor Uporov #else
5507fe6a43SSeth Howell 	struct iocb			iocb;
569e06b192SFedor Uporov #endif
5707fe6a43SSeth Howell 	uint64_t			len;
5807fe6a43SSeth Howell 	struct bdev_aio_io_channel	*ch;
5907fe6a43SSeth Howell };
6007fe6a43SSeth Howell 
6107fe6a43SSeth Howell struct file_disk {
6207fe6a43SSeth Howell 	struct bdev_aio_task	*reset_task;
6307fe6a43SSeth Howell 	struct spdk_poller	*reset_retry_timer;
6407fe6a43SSeth Howell 	struct spdk_bdev	disk;
6507fe6a43SSeth Howell 	char			*filename;
6607fe6a43SSeth Howell 	int			fd;
67*3428322bSJinlong Chen #ifdef RWF_NOWAIT
684c446029SJim Harris 	bool			use_nowait;
69*3428322bSJinlong Chen #endif
7007fe6a43SSeth Howell 	TAILQ_ENTRY(file_disk)  link;
7107fe6a43SSeth Howell 	bool			block_size_override;
724c6a2e3dSYuhua 	bool			readonly;
73648a5414Szhenwei pi 	bool			fallocate;
7407fe6a43SSeth Howell };
7507fe6a43SSeth Howell 
7607fe6a43SSeth Howell /* For user space reaping of completions */
7707fe6a43SSeth Howell struct spdk_aio_ring {
7807fe6a43SSeth Howell 	uint32_t id;
7907fe6a43SSeth Howell 	uint32_t size;
8007fe6a43SSeth Howell 	uint32_t head;
8107fe6a43SSeth Howell 	uint32_t tail;
8207fe6a43SSeth Howell 
8307fe6a43SSeth Howell 	uint32_t version;
8407fe6a43SSeth Howell 	uint32_t compat_features;
8507fe6a43SSeth Howell 	uint32_t incompat_features;
8607fe6a43SSeth Howell 	uint32_t header_length;
8707fe6a43SSeth Howell };
8807fe6a43SSeth Howell 
8907fe6a43SSeth Howell #define SPDK_AIO_RING_VERSION	0xa10a10a1
9007fe6a43SSeth Howell 
9107fe6a43SSeth Howell static int bdev_aio_initialize(void);
9207fe6a43SSeth Howell static void bdev_aio_fini(void);
9307fe6a43SSeth Howell static void aio_free_disk(struct file_disk *fdisk);
94232bfc75Syidong0635 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head);
9507fe6a43SSeth Howell 
9607fe6a43SSeth Howell #define SPDK_AIO_QUEUE_DEPTH 128
9707fe6a43SSeth Howell #define MAX_EVENTS_PER_POLL 32
9807fe6a43SSeth Howell 
9907fe6a43SSeth Howell static int
10007fe6a43SSeth Howell bdev_aio_get_ctx_size(void)
10107fe6a43SSeth Howell {
10207fe6a43SSeth Howell 	return sizeof(struct bdev_aio_task);
10307fe6a43SSeth Howell }
10407fe6a43SSeth Howell 
10507fe6a43SSeth Howell static struct spdk_bdev_module aio_if = {
10607fe6a43SSeth Howell 	.name		= "aio",
10707fe6a43SSeth Howell 	.module_init	= bdev_aio_initialize,
10807fe6a43SSeth Howell 	.module_fini	= bdev_aio_fini,
10907fe6a43SSeth Howell 	.get_ctx_size	= bdev_aio_get_ctx_size,
11007fe6a43SSeth Howell };
11107fe6a43SSeth Howell 
11207fe6a43SSeth Howell SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
11307fe6a43SSeth Howell 
11407fe6a43SSeth Howell static int
11507fe6a43SSeth Howell bdev_aio_open(struct file_disk *disk)
11607fe6a43SSeth Howell {
11707fe6a43SSeth Howell 	int fd;
1184c6a2e3dSYuhua 	int io_flag = disk->readonly ? O_RDONLY : O_RDWR;
119*3428322bSJinlong Chen #ifdef RWF_NOWAIT
1204c446029SJim Harris 	struct stat st;
121*3428322bSJinlong Chen #endif
12207fe6a43SSeth Howell 
1234c6a2e3dSYuhua 	fd = open(disk->filename, io_flag | O_DIRECT);
12407fe6a43SSeth Howell 	if (fd < 0) {
12507fe6a43SSeth Howell 		/* Try without O_DIRECT for non-disk files */
1264c6a2e3dSYuhua 		fd = open(disk->filename, io_flag);
12707fe6a43SSeth Howell 		if (fd < 0) {
12807fe6a43SSeth Howell 			SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
12907fe6a43SSeth Howell 				    disk->filename, errno, spdk_strerror(errno));
13007fe6a43SSeth Howell 			disk->fd = -1;
13107fe6a43SSeth Howell 			return -1;
13207fe6a43SSeth Howell 		}
13307fe6a43SSeth Howell 	}
13407fe6a43SSeth Howell 
13507fe6a43SSeth Howell 	disk->fd = fd;
136*3428322bSJinlong Chen 
137*3428322bSJinlong Chen #ifdef RWF_NOWAIT
1384c446029SJim Harris 	/* Some aio operations can block, for example if number outstanding
1394c446029SJim Harris 	 * I/O exceeds number of block layer tags. But not all files can
1404c446029SJim Harris 	 * support RWF_NOWAIT flag. So use RWF_NOWAIT on block devices only.
1414c446029SJim Harris 	 */
1424c446029SJim Harris 	disk->use_nowait = fstat(fd, &st) == 0 && S_ISBLK(st.st_mode);
143*3428322bSJinlong Chen #endif
14407fe6a43SSeth Howell 
14507fe6a43SSeth Howell 	return 0;
14607fe6a43SSeth Howell }
14707fe6a43SSeth Howell 
14807fe6a43SSeth Howell static int
14907fe6a43SSeth Howell bdev_aio_close(struct file_disk *disk)
15007fe6a43SSeth Howell {
15107fe6a43SSeth Howell 	int rc;
15207fe6a43SSeth Howell 
15307fe6a43SSeth Howell 	if (disk->fd == -1) {
15407fe6a43SSeth Howell 		return 0;
15507fe6a43SSeth Howell 	}
15607fe6a43SSeth Howell 
15707fe6a43SSeth Howell 	rc = close(disk->fd);
15807fe6a43SSeth Howell 	if (rc < 0) {
15907fe6a43SSeth Howell 		SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
16007fe6a43SSeth Howell 			    disk->fd, errno, spdk_strerror(errno));
16107fe6a43SSeth Howell 		return -1;
16207fe6a43SSeth Howell 	}
16307fe6a43SSeth Howell 
16407fe6a43SSeth Howell 	disk->fd = -1;
16507fe6a43SSeth Howell 
16607fe6a43SSeth Howell 	return 0;
16707fe6a43SSeth Howell }
16807fe6a43SSeth Howell 
1699e06b192SFedor Uporov #ifdef __FreeBSD__
1709e06b192SFedor Uporov static int
1719e06b192SFedor Uporov bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
1729e06b192SFedor Uporov 		   struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
1739e06b192SFedor Uporov 		   struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
1749e06b192SFedor Uporov {
1759e06b192SFedor Uporov 	struct aiocb *aiocb = &aio_task->aiocb;
1769e06b192SFedor Uporov 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
1779e06b192SFedor Uporov 
1789e06b192SFedor Uporov 	memset(aiocb, 0, sizeof(struct aiocb));
1799e06b192SFedor Uporov 	aiocb->aio_fildes = fdisk->fd;
1809e06b192SFedor Uporov 	aiocb->aio_iov = iov;
1819e06b192SFedor Uporov 	aiocb->aio_iovcnt = iovcnt;
1829e06b192SFedor Uporov 	aiocb->aio_offset = offset;
1839e06b192SFedor Uporov 	aiocb->aio_sigevent.sigev_notify_kqueue = aio_ch->kqfd;
1849e06b192SFedor Uporov 	aiocb->aio_sigevent.sigev_value.sival_ptr = aio_task;
1859e06b192SFedor Uporov 	aiocb->aio_sigevent.sigev_notify = SIGEV_KEVENT;
1869e06b192SFedor Uporov 
1879e06b192SFedor Uporov 	aio_task->len = nbytes;
1889e06b192SFedor Uporov 	aio_task->ch = aio_ch;
1899e06b192SFedor Uporov 
1909e06b192SFedor Uporov 	if (type == SPDK_BDEV_IO_TYPE_READ) {
1919e06b192SFedor Uporov 		return aio_readv(aiocb);
1929e06b192SFedor Uporov 	}
1939e06b192SFedor Uporov 
1949e06b192SFedor Uporov 	return aio_writev(aiocb);
1959e06b192SFedor Uporov }
1969e06b192SFedor Uporov #else
1979e06b192SFedor Uporov static int
1989e06b192SFedor Uporov bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
1999e06b192SFedor Uporov 		   struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
20007fe6a43SSeth Howell 		   struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
20107fe6a43SSeth Howell {
20207fe6a43SSeth Howell 	struct iocb *iocb = &aio_task->iocb;
20307fe6a43SSeth Howell 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
20407fe6a43SSeth Howell 
2059e06b192SFedor Uporov 	if (type == SPDK_BDEV_IO_TYPE_READ) {
20607fe6a43SSeth Howell 		io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
2079e06b192SFedor Uporov 	} else {
2089e06b192SFedor Uporov 		io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
2099e06b192SFedor Uporov 	}
2109e06b192SFedor Uporov 
2117f6afb7bSLiu Xiaodong 	if (aio_ch->group_ch->efd >= 0) {
212ad8c05dcSLiu Xiaodong 		io_set_eventfd(iocb, aio_ch->group_ch->efd);
213ad8c05dcSLiu Xiaodong 	}
21407fe6a43SSeth Howell 	iocb->data = aio_task;
215*3428322bSJinlong Chen #ifdef RWF_NOWAIT
2164c446029SJim Harris 	if (fdisk->use_nowait) {
2174c446029SJim Harris 		iocb->aio_rw_flags = RWF_NOWAIT;
2184c446029SJim Harris 	}
219*3428322bSJinlong Chen #endif
22007fe6a43SSeth Howell 	aio_task->len = nbytes;
22107fe6a43SSeth Howell 	aio_task->ch = aio_ch;
22207fe6a43SSeth Howell 
2239e06b192SFedor Uporov 	return io_submit(aio_ch->io_ctx, 1, &iocb);
22407fe6a43SSeth Howell }
2259e06b192SFedor Uporov #endif
22607fe6a43SSeth Howell 
227e90a98edSJim Harris static void
2289e06b192SFedor Uporov bdev_aio_rw(enum spdk_bdev_io_type type, struct file_disk *fdisk,
2299e06b192SFedor Uporov 	    struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
2309e06b192SFedor Uporov 	    struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
23107fe6a43SSeth Howell {
23207fe6a43SSeth Howell 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
23307fe6a43SSeth Howell 	int rc;
23407fe6a43SSeth Howell 
2359e06b192SFedor Uporov 	if (type == SPDK_BDEV_IO_TYPE_READ) {
2369e06b192SFedor Uporov 		SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
2379e06b192SFedor Uporov 			      iovcnt, nbytes, offset);
2389e06b192SFedor Uporov 	} else {
2392172c432STomasz Zawadzki 		SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
2409e06b192SFedor Uporov 			      iovcnt, nbytes, offset);
2419e06b192SFedor Uporov 	}
24207fe6a43SSeth Howell 
2439e06b192SFedor Uporov 	rc = bdev_aio_submit_io(type, fdisk, ch, aio_task, iov, iovcnt, nbytes, offset);
244e90a98edSJim Harris 	if (spdk_unlikely(rc < 0)) {
24507fe6a43SSeth Howell 		if (rc == -EAGAIN) {
24607fe6a43SSeth Howell 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
24707fe6a43SSeth Howell 		} else {
248021ff6edSLiu Xiaodong 			spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc);
24907fe6a43SSeth Howell 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
25007fe6a43SSeth Howell 		}
251e90a98edSJim Harris 	} else {
25207fe6a43SSeth Howell 		aio_ch->io_inflight++;
253e90a98edSJim Harris 	}
25407fe6a43SSeth Howell }
25507fe6a43SSeth Howell 
25607fe6a43SSeth Howell static void
25707fe6a43SSeth Howell bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
25807fe6a43SSeth Howell {
25907fe6a43SSeth Howell 	int rc = fsync(fdisk->fd);
26007fe6a43SSeth Howell 
261021ff6edSLiu Xiaodong 	if (rc == 0) {
262021ff6edSLiu Xiaodong 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
263021ff6edSLiu Xiaodong 	} else {
264021ff6edSLiu Xiaodong 		spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno);
265021ff6edSLiu Xiaodong 	}
26607fe6a43SSeth Howell }
26707fe6a43SSeth Howell 
268648a5414Szhenwei pi #ifndef __FreeBSD__
269648a5414Szhenwei pi static void
270648a5414Szhenwei pi bdev_aio_fallocate(struct spdk_bdev_io *bdev_io, int mode)
271648a5414Szhenwei pi {
272648a5414Szhenwei pi 	struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt;
273648a5414Szhenwei pi 	struct bdev_aio_task *aio_task = (struct bdev_aio_task *)bdev_io->driver_ctx;
274648a5414Szhenwei pi 	uint64_t offset_bytes = bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen;
275648a5414Szhenwei pi 	uint64_t length_bytes = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
276648a5414Szhenwei pi 	int rc;
277648a5414Szhenwei pi 
278648a5414Szhenwei pi 	if (!fdisk->fallocate) {
279648a5414Szhenwei pi 		spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -ENOTSUP);
280648a5414Szhenwei pi 		return;
281648a5414Szhenwei pi 	}
282648a5414Szhenwei pi 
283648a5414Szhenwei pi 	rc = fallocate(fdisk->fd, mode, offset_bytes, length_bytes);
284648a5414Szhenwei pi 	if (rc == 0) {
285648a5414Szhenwei pi 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
286648a5414Szhenwei pi 	} else {
287648a5414Szhenwei pi 		spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno);
288648a5414Szhenwei pi 	}
289648a5414Szhenwei pi }
290648a5414Szhenwei pi 
291648a5414Szhenwei pi static void
292648a5414Szhenwei pi bdev_aio_unmap(struct spdk_bdev_io *bdev_io)
293648a5414Szhenwei pi {
294648a5414Szhenwei pi 	int mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
295648a5414Szhenwei pi 
296648a5414Szhenwei pi 	bdev_aio_fallocate(bdev_io, mode);
297648a5414Szhenwei pi }
29871eba7ffSzhenwei pi 
29971eba7ffSzhenwei pi 
30071eba7ffSzhenwei pi static void
30171eba7ffSzhenwei pi bdev_aio_write_zeros(struct spdk_bdev_io *bdev_io)
30271eba7ffSzhenwei pi {
30371eba7ffSzhenwei pi 	int mode = FALLOC_FL_ZERO_RANGE;
30471eba7ffSzhenwei pi 
30571eba7ffSzhenwei pi 	bdev_aio_fallocate(bdev_io, mode);
30671eba7ffSzhenwei pi }
307648a5414Szhenwei pi #endif
308648a5414Szhenwei pi 
309f99d46d5SZiye Yang static void
310f99d46d5SZiye Yang bdev_aio_destruct_cb(void *io_device)
31107fe6a43SSeth Howell {
312f99d46d5SZiye Yang 	struct file_disk *fdisk = io_device;
31307fe6a43SSeth Howell 	int rc = 0;
31407fe6a43SSeth Howell 
31507fe6a43SSeth Howell 	TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
31607fe6a43SSeth Howell 	rc = bdev_aio_close(fdisk);
31707fe6a43SSeth Howell 	if (rc < 0) {
31807fe6a43SSeth Howell 		SPDK_ERRLOG("bdev_aio_close() failed\n");
31907fe6a43SSeth Howell 	}
32007fe6a43SSeth Howell 	aio_free_disk(fdisk);
321f99d46d5SZiye Yang }
322f99d46d5SZiye Yang 
323f99d46d5SZiye Yang static int
324f99d46d5SZiye Yang bdev_aio_destruct(void *ctx)
325f99d46d5SZiye Yang {
326f99d46d5SZiye Yang 	struct file_disk *fdisk = ctx;
327f99d46d5SZiye Yang 
328f99d46d5SZiye Yang 	spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb);
329f99d46d5SZiye Yang 
330f99d46d5SZiye Yang 	return 0;
33107fe6a43SSeth Howell }
33207fe6a43SSeth Howell 
3339e06b192SFedor Uporov #ifdef __FreeBSD__
3349e06b192SFedor Uporov static int
3359e06b192SFedor Uporov bdev_user_io_getevents(int kq, unsigned int max, struct kevent *events)
3369e06b192SFedor Uporov {
3379e06b192SFedor Uporov 	struct timespec ts;
3389e06b192SFedor Uporov 	int count;
3399e06b192SFedor Uporov 
3409e06b192SFedor Uporov 	memset(events, 0, max * sizeof(struct kevent));
3419e06b192SFedor Uporov 	memset(&ts, 0, sizeof(ts));
3429e06b192SFedor Uporov 
3439e06b192SFedor Uporov 	count = kevent(kq, NULL, 0, events, max, &ts);
3449e06b192SFedor Uporov 	if (count < 0) {
3459e06b192SFedor Uporov 		SPDK_ERRLOG("failed to get kevents: %s.\n", spdk_strerror(errno));
3469e06b192SFedor Uporov 		return -errno;
3479e06b192SFedor Uporov 	}
3489e06b192SFedor Uporov 
3499e06b192SFedor Uporov 	return count;
3509e06b192SFedor Uporov }
3519e06b192SFedor Uporov 
3529e06b192SFedor Uporov static int
3539e06b192SFedor Uporov bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
3549e06b192SFedor Uporov {
3559e06b192SFedor Uporov 	int nr, i, res = 0;
3569e06b192SFedor Uporov 	struct bdev_aio_task *aio_task;
3579e06b192SFedor Uporov 	struct kevent events[SPDK_AIO_QUEUE_DEPTH];
3589e06b192SFedor Uporov 
3599e06b192SFedor Uporov 	nr = bdev_user_io_getevents(io_ch->kqfd, SPDK_AIO_QUEUE_DEPTH, events);
3609e06b192SFedor Uporov 	if (nr < 0) {
3619e06b192SFedor Uporov 		return 0;
3629e06b192SFedor Uporov 	}
3639e06b192SFedor Uporov 
3649e06b192SFedor Uporov 	for (i = 0; i < nr; i++) {
3659e06b192SFedor Uporov 		aio_task = events[i].udata;
3669e06b192SFedor Uporov 		aio_task->ch->io_inflight--;
3679e06b192SFedor Uporov 		if (aio_task == NULL) {
3689e06b192SFedor Uporov 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
3699e06b192SFedor Uporov 			break;
3709e06b192SFedor Uporov 		} else if ((uint64_t)aio_return(&aio_task->aiocb) == aio_task->len) {
3719e06b192SFedor Uporov 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
3729e06b192SFedor Uporov 		} else {
3739e06b192SFedor Uporov 			SPDK_ERRLOG("failed to complete aio: rc %d\n", aio_error(&aio_task->aiocb));
3749e06b192SFedor Uporov 			res = aio_error(&aio_task->aiocb);
3759e06b192SFedor Uporov 			if (res != 0) {
3769e06b192SFedor Uporov 				spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
3779e06b192SFedor Uporov 			} else {
3789e06b192SFedor Uporov 				spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
3799e06b192SFedor Uporov 			}
3809e06b192SFedor Uporov 		}
3819e06b192SFedor Uporov 	}
3829e06b192SFedor Uporov 
3839e06b192SFedor Uporov 	return nr;
3849e06b192SFedor Uporov }
3859e06b192SFedor Uporov #else
38607fe6a43SSeth Howell static int
38707fe6a43SSeth Howell bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
38807fe6a43SSeth Howell {
38907fe6a43SSeth Howell 	uint32_t head, tail, count;
39007fe6a43SSeth Howell 	struct spdk_aio_ring *ring;
39107fe6a43SSeth Howell 	struct timespec timeout;
39207fe6a43SSeth Howell 	struct io_event *kevents;
39307fe6a43SSeth Howell 
39407fe6a43SSeth Howell 	ring = (struct spdk_aio_ring *)io_ctx;
39507fe6a43SSeth Howell 
39607fe6a43SSeth Howell 	if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
39707fe6a43SSeth Howell 		timeout.tv_sec = 0;
39807fe6a43SSeth Howell 		timeout.tv_nsec = 0;
39907fe6a43SSeth Howell 
40007fe6a43SSeth Howell 		return io_getevents(io_ctx, 0, max, uevents, &timeout);
40107fe6a43SSeth Howell 	}
40207fe6a43SSeth Howell 
40307fe6a43SSeth Howell 	/* Read the current state out of the ring */
40407fe6a43SSeth Howell 	head = ring->head;
40507fe6a43SSeth Howell 	tail = ring->tail;
40607fe6a43SSeth Howell 
40707fe6a43SSeth Howell 	/* This memory barrier is required to prevent the loads above
40807fe6a43SSeth Howell 	 * from being re-ordered with stores to the events array
40907fe6a43SSeth Howell 	 * potentially occurring on other threads. */
41007fe6a43SSeth Howell 	spdk_smp_rmb();
41107fe6a43SSeth Howell 
41207fe6a43SSeth Howell 	/* Calculate how many items are in the circular ring */
41307fe6a43SSeth Howell 	count = tail - head;
41407fe6a43SSeth Howell 	if (tail < head) {
41507fe6a43SSeth Howell 		count += ring->size;
41607fe6a43SSeth Howell 	}
41707fe6a43SSeth Howell 
41807fe6a43SSeth Howell 	/* Reduce the count to the limit provided by the user */
41907fe6a43SSeth Howell 	count = spdk_min(max, count);
42007fe6a43SSeth Howell 
42107fe6a43SSeth Howell 	/* Grab the memory location of the event array */
42207fe6a43SSeth Howell 	kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
42307fe6a43SSeth Howell 
42407fe6a43SSeth Howell 	/* Copy the events out of the ring. */
42507fe6a43SSeth Howell 	if ((head + count) <= ring->size) {
42607fe6a43SSeth Howell 		/* Only one copy is required */
42707fe6a43SSeth Howell 		memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
42807fe6a43SSeth Howell 	} else {
42907fe6a43SSeth Howell 		uint32_t first_part = ring->size - head;
43007fe6a43SSeth Howell 		/* Two copies are required */
43107fe6a43SSeth Howell 		memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
43207fe6a43SSeth Howell 		memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
43307fe6a43SSeth Howell 	}
43407fe6a43SSeth Howell 
43507fe6a43SSeth Howell 	/* Update the head pointer. On x86, stores will not be reordered with older loads,
43607fe6a43SSeth Howell 	 * so the copies out of the event array will always be complete prior to this
43707fe6a43SSeth Howell 	 * update becoming visible. On other architectures this is not guaranteed, so
43807fe6a43SSeth Howell 	 * add a barrier. */
43907fe6a43SSeth Howell #if defined(__i386__) || defined(__x86_64__)
44007fe6a43SSeth Howell 	spdk_compiler_barrier();
44107fe6a43SSeth Howell #else
44207fe6a43SSeth Howell 	spdk_smp_mb();
44307fe6a43SSeth Howell #endif
44407fe6a43SSeth Howell 	ring->head = (head + count) % ring->size;
44507fe6a43SSeth Howell 
44607fe6a43SSeth Howell 	return count;
44707fe6a43SSeth Howell }
44807fe6a43SSeth Howell 
44907fe6a43SSeth Howell static int
450195fb4e4SXiaodong Liu bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
45107fe6a43SSeth Howell {
452488d6e84Sjun.ran 	int nr, i, res = 0;
45307fe6a43SSeth Howell 	struct bdev_aio_task *aio_task;
45407fe6a43SSeth Howell 	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
45507fe6a43SSeth Howell 
456195fb4e4SXiaodong Liu 	nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
45707fe6a43SSeth Howell 	if (nr < 0) {
458195fb4e4SXiaodong Liu 		return 0;
45907fe6a43SSeth Howell 	}
46007fe6a43SSeth Howell 
46107fe6a43SSeth Howell 	for (i = 0; i < nr; i++) {
46207fe6a43SSeth Howell 		aio_task = events[i].data;
46307fe6a43SSeth Howell 		aio_task->ch->io_inflight--;
4649be660eaSLiu Xiaodong 		if (events[i].res == aio_task->len) {
465021ff6edSLiu Xiaodong 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
466021ff6edSLiu Xiaodong 		} else {
4679be660eaSLiu Xiaodong 			/* From aio_abi.h, io_event.res is defined __s64, negative errno
4689be660eaSLiu Xiaodong 			 * will be assigned to io_event.res for error situation.
4699be660eaSLiu Xiaodong 			 * But from libaio.h, io_event.res is defined unsigned long, so
4709be660eaSLiu Xiaodong 			 * convert it to signed value for error detection.
4719be660eaSLiu Xiaodong 			 */
472488d6e84Sjun.ran 			res = (int)events[i].res;
473488d6e84Sjun.ran 			if (res < 0) {
4744c446029SJim Harris 				if (res == -EAGAIN) {
4754c446029SJim Harris 					spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
476488d6e84Sjun.ran 				} else {
4774c446029SJim Harris 					SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res);
4784c446029SJim Harris 					spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
4794c446029SJim Harris 				}
4804c446029SJim Harris 			} else {
4814c446029SJim Harris 				SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res);
482488d6e84Sjun.ran 				spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
483488d6e84Sjun.ran 			}
484021ff6edSLiu Xiaodong 		}
48507fe6a43SSeth Howell 	}
48607fe6a43SSeth Howell 
487195fb4e4SXiaodong Liu 	return nr;
488195fb4e4SXiaodong Liu }
4899e06b192SFedor Uporov #endif
490195fb4e4SXiaodong Liu 
491195fb4e4SXiaodong Liu static int
492195fb4e4SXiaodong Liu bdev_aio_group_poll(void *arg)
493195fb4e4SXiaodong Liu {
494195fb4e4SXiaodong Liu 	struct bdev_aio_group_channel *group_ch = arg;
495195fb4e4SXiaodong Liu 	struct bdev_aio_io_channel *io_ch;
496195fb4e4SXiaodong Liu 	int nr = 0;
497195fb4e4SXiaodong Liu 
498195fb4e4SXiaodong Liu 	TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
499195fb4e4SXiaodong Liu 		nr += bdev_aio_io_channel_poll(io_ch);
500195fb4e4SXiaodong Liu 	}
501195fb4e4SXiaodong Liu 
502eb05cbd6SMaciej Szwed 	return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
50307fe6a43SSeth Howell }
50407fe6a43SSeth Howell 
505ad8c05dcSLiu Xiaodong static int
506ad8c05dcSLiu Xiaodong bdev_aio_group_interrupt(void *arg)
507ad8c05dcSLiu Xiaodong {
508ad8c05dcSLiu Xiaodong 	struct bdev_aio_group_channel *group_ch = arg;
509ad8c05dcSLiu Xiaodong 	int rc;
510ad8c05dcSLiu Xiaodong 	uint64_t num_events;
511ad8c05dcSLiu Xiaodong 
5127f6afb7bSLiu Xiaodong 	assert(group_ch->efd >= 0);
513ad8c05dcSLiu Xiaodong 
514ad8c05dcSLiu Xiaodong 	/* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH,
515ad8c05dcSLiu Xiaodong 	 * io_getevent should be called again to ensure all completed IO are processed.
516ad8c05dcSLiu Xiaodong 	 */
517ad8c05dcSLiu Xiaodong 	rc = read(group_ch->efd, &num_events, sizeof(num_events));
518ad8c05dcSLiu Xiaodong 	if (rc < 0) {
519ad8c05dcSLiu Xiaodong 		SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno));
520ad8c05dcSLiu Xiaodong 		return -errno;
521ad8c05dcSLiu Xiaodong 	}
522ad8c05dcSLiu Xiaodong 
523ad8c05dcSLiu Xiaodong 	if (num_events > SPDK_AIO_QUEUE_DEPTH) {
524ad8c05dcSLiu Xiaodong 		num_events -= SPDK_AIO_QUEUE_DEPTH;
52547962dc7SLiu Xiaodong 		rc = write(group_ch->efd, &num_events, sizeof(num_events));
52647962dc7SLiu Xiaodong 		if (rc < 0) {
527ad8c05dcSLiu Xiaodong 			SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno));
528ad8c05dcSLiu Xiaodong 		}
529ad8c05dcSLiu Xiaodong 	}
530ad8c05dcSLiu Xiaodong 
531ad8c05dcSLiu Xiaodong 	return bdev_aio_group_poll(group_ch);
532ad8c05dcSLiu Xiaodong }
533ad8c05dcSLiu Xiaodong 
53407fe6a43SSeth Howell static void
53507fe6a43SSeth Howell _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
53607fe6a43SSeth Howell {
53707fe6a43SSeth Howell 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
53807fe6a43SSeth Howell 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
53907fe6a43SSeth Howell 
54007fe6a43SSeth Howell 	if (aio_ch->io_inflight) {
54107fe6a43SSeth Howell 		spdk_for_each_channel_continue(i, -1);
54207fe6a43SSeth Howell 		return;
54307fe6a43SSeth Howell 	}
54407fe6a43SSeth Howell 
54507fe6a43SSeth Howell 	spdk_for_each_channel_continue(i, 0);
54607fe6a43SSeth Howell }
54707fe6a43SSeth Howell 
54807fe6a43SSeth Howell static int bdev_aio_reset_retry_timer(void *arg);
54907fe6a43SSeth Howell 
55007fe6a43SSeth Howell static void
55107fe6a43SSeth Howell _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
55207fe6a43SSeth Howell {
55307fe6a43SSeth Howell 	struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
55407fe6a43SSeth Howell 
55507fe6a43SSeth Howell 	if (status == -1) {
556ab0bc5c2SShuhei Matsumoto 		fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500);
55707fe6a43SSeth Howell 		return;
55807fe6a43SSeth Howell 	}
55907fe6a43SSeth Howell 
56007fe6a43SSeth Howell 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
56107fe6a43SSeth Howell }
56207fe6a43SSeth Howell 
56307fe6a43SSeth Howell static int
56407fe6a43SSeth Howell bdev_aio_reset_retry_timer(void *arg)
56507fe6a43SSeth Howell {
56607fe6a43SSeth Howell 	struct file_disk *fdisk = arg;
56707fe6a43SSeth Howell 
56807fe6a43SSeth Howell 	if (fdisk->reset_retry_timer) {
56907fe6a43SSeth Howell 		spdk_poller_unregister(&fdisk->reset_retry_timer);
57007fe6a43SSeth Howell 	}
57107fe6a43SSeth Howell 
57207fe6a43SSeth Howell 	spdk_for_each_channel(fdisk,
57307fe6a43SSeth Howell 			      _bdev_aio_get_io_inflight,
57407fe6a43SSeth Howell 			      fdisk,
57507fe6a43SSeth Howell 			      _bdev_aio_get_io_inflight_done);
57607fe6a43SSeth Howell 
577eb05cbd6SMaciej Szwed 	return SPDK_POLLER_BUSY;
57807fe6a43SSeth Howell }
57907fe6a43SSeth Howell 
58007fe6a43SSeth Howell static void
58107fe6a43SSeth Howell bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
58207fe6a43SSeth Howell {
58307fe6a43SSeth Howell 	fdisk->reset_task = aio_task;
58407fe6a43SSeth Howell 
58507fe6a43SSeth Howell 	bdev_aio_reset_retry_timer(fdisk);
58607fe6a43SSeth Howell }
58707fe6a43SSeth Howell 
58807fe6a43SSeth Howell static void
58907fe6a43SSeth Howell bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
59007fe6a43SSeth Howell 		    bool success)
59107fe6a43SSeth Howell {
59207fe6a43SSeth Howell 	if (!success) {
59307fe6a43SSeth Howell 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
59407fe6a43SSeth Howell 		return;
59507fe6a43SSeth Howell 	}
59607fe6a43SSeth Howell 
59707fe6a43SSeth Howell 	switch (bdev_io->type) {
59807fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_READ:
59907fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_WRITE:
6009e06b192SFedor Uporov 		bdev_aio_rw(bdev_io->type,
6019e06b192SFedor Uporov 			    (struct file_disk *)bdev_io->bdev->ctxt,
60207fe6a43SSeth Howell 			    ch,
60307fe6a43SSeth Howell 			    (struct bdev_aio_task *)bdev_io->driver_ctx,
60407fe6a43SSeth Howell 			    bdev_io->u.bdev.iovs,
60507fe6a43SSeth Howell 			    bdev_io->u.bdev.iovcnt,
60607fe6a43SSeth Howell 			    bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
60707fe6a43SSeth Howell 			    bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
60807fe6a43SSeth Howell 		break;
60907fe6a43SSeth Howell 	default:
61007fe6a43SSeth Howell 		SPDK_ERRLOG("Wrong io type\n");
61107fe6a43SSeth Howell 		break;
61207fe6a43SSeth Howell 	}
61307fe6a43SSeth Howell }
61407fe6a43SSeth Howell 
6158dd1cd21SBen Walker static int
6168dd1cd21SBen Walker _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
61707fe6a43SSeth Howell {
6184c6a2e3dSYuhua 	struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt;
6194c6a2e3dSYuhua 
62007fe6a43SSeth Howell 	switch (bdev_io->type) {
62107fe6a43SSeth Howell 	/* Read and write operations must be performed on buffers aligned to
62207fe6a43SSeth Howell 	 * bdev->required_alignment. If user specified unaligned buffers,
62307fe6a43SSeth Howell 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
62407fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_READ:
62507fe6a43SSeth Howell 		spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
62607fe6a43SSeth Howell 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
62707fe6a43SSeth Howell 		return 0;
6284c6a2e3dSYuhua 	case SPDK_BDEV_IO_TYPE_WRITE:
6294c6a2e3dSYuhua 		if (fdisk->readonly) {
6304c6a2e3dSYuhua 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
6314c6a2e3dSYuhua 		} else {
6324c6a2e3dSYuhua 			spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
6334c6a2e3dSYuhua 					     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
6344c6a2e3dSYuhua 		}
6354c6a2e3dSYuhua 		return 0;
6364c6a2e3dSYuhua 
63707fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_FLUSH:
63807fe6a43SSeth Howell 		bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
63907fe6a43SSeth Howell 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
64007fe6a43SSeth Howell 		return 0;
64107fe6a43SSeth Howell 
64207fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_RESET:
64307fe6a43SSeth Howell 		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
64407fe6a43SSeth Howell 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
64507fe6a43SSeth Howell 		return 0;
646648a5414Szhenwei pi 
647648a5414Szhenwei pi #ifndef __FreeBSD__
648648a5414Szhenwei pi 	case SPDK_BDEV_IO_TYPE_UNMAP:
649648a5414Szhenwei pi 		bdev_aio_unmap(bdev_io);
650648a5414Szhenwei pi 		return 0;
65171eba7ffSzhenwei pi 
65271eba7ffSzhenwei pi 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
65371eba7ffSzhenwei pi 		bdev_aio_write_zeros(bdev_io);
65471eba7ffSzhenwei pi 		return 0;
655648a5414Szhenwei pi #endif
656648a5414Szhenwei pi 
65707fe6a43SSeth Howell 	default:
65807fe6a43SSeth Howell 		return -1;
65907fe6a43SSeth Howell 	}
66007fe6a43SSeth Howell }
66107fe6a43SSeth Howell 
6628dd1cd21SBen Walker static void
6638dd1cd21SBen Walker bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
66407fe6a43SSeth Howell {
66507fe6a43SSeth Howell 	if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
66607fe6a43SSeth Howell 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
66707fe6a43SSeth Howell 	}
66807fe6a43SSeth Howell }
66907fe6a43SSeth Howell 
67007fe6a43SSeth Howell static bool
67107fe6a43SSeth Howell bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
67207fe6a43SSeth Howell {
673648a5414Szhenwei pi 	struct file_disk *fdisk = ctx;
674648a5414Szhenwei pi 
67507fe6a43SSeth Howell 	switch (io_type) {
67607fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_READ:
67707fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_WRITE:
67807fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_FLUSH:
67907fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_RESET:
68007fe6a43SSeth Howell 		return true;
68107fe6a43SSeth Howell 
682648a5414Szhenwei pi 	case SPDK_BDEV_IO_TYPE_UNMAP:
68371eba7ffSzhenwei pi 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
684648a5414Szhenwei pi 		return fdisk->fallocate;
685648a5414Szhenwei pi 
68607fe6a43SSeth Howell 	default:
68707fe6a43SSeth Howell 		return false;
68807fe6a43SSeth Howell 	}
68907fe6a43SSeth Howell }
69007fe6a43SSeth Howell 
6919e06b192SFedor Uporov #ifdef __FreeBSD__
69207fe6a43SSeth Howell static int
6939e06b192SFedor Uporov bdev_aio_create_io(struct bdev_aio_io_channel *ch)
69407fe6a43SSeth Howell {
6959e06b192SFedor Uporov 	ch->kqfd = kqueue();
6969e06b192SFedor Uporov 	if (ch->kqfd < 0) {
6979e06b192SFedor Uporov 		SPDK_ERRLOG("async I/O context setup failure: %s.\n", spdk_strerror(errno));
6989e06b192SFedor Uporov 		return -1;
6999e06b192SFedor Uporov 	}
70007fe6a43SSeth Howell 
7019e06b192SFedor Uporov 	return 0;
7029e06b192SFedor Uporov }
7039e06b192SFedor Uporov 
7049e06b192SFedor Uporov static void
7059e06b192SFedor Uporov bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
7069e06b192SFedor Uporov {
7079e06b192SFedor Uporov 	close(ch->kqfd);
7089e06b192SFedor Uporov }
7099e06b192SFedor Uporov #else
7109e06b192SFedor Uporov static int
7119e06b192SFedor Uporov bdev_aio_create_io(struct bdev_aio_io_channel *ch)
7129e06b192SFedor Uporov {
713195fb4e4SXiaodong Liu 	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
714488abc2bSJim Harris 		SPDK_ERRLOG("Async I/O context setup failure, likely due to exceeding kernel limit.\n");
715488abc2bSJim Harris 		SPDK_ERRLOG("This limit may be increased using 'sysctl -w fs.aio-max-nr'.\n");
716195fb4e4SXiaodong Liu 		return -1;
717195fb4e4SXiaodong Liu 	}
718195fb4e4SXiaodong Liu 
7199e06b192SFedor Uporov 	return 0;
7209e06b192SFedor Uporov }
7219e06b192SFedor Uporov 
7229e06b192SFedor Uporov static void
7239e06b192SFedor Uporov bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
7249e06b192SFedor Uporov {
7259e06b192SFedor Uporov 	io_destroy(ch->io_ctx);
7269e06b192SFedor Uporov }
7279e06b192SFedor Uporov #endif
7289e06b192SFedor Uporov 
7299e06b192SFedor Uporov static int
7309e06b192SFedor Uporov bdev_aio_create_cb(void *io_device, void *ctx_buf)
7319e06b192SFedor Uporov {
7329e06b192SFedor Uporov 	struct bdev_aio_io_channel *ch = ctx_buf;
7339e06b192SFedor Uporov 	int rc;
7349e06b192SFedor Uporov 
7359e06b192SFedor Uporov 	rc = bdev_aio_create_io(ch);
7369e06b192SFedor Uporov 	if (rc < 0) {
7379e06b192SFedor Uporov 		return rc;
7389e06b192SFedor Uporov 	}
7399e06b192SFedor Uporov 
74007fe6a43SSeth Howell 	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
741195fb4e4SXiaodong Liu 	TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
74207fe6a43SSeth Howell 
74307fe6a43SSeth Howell 	return 0;
74407fe6a43SSeth Howell }
74507fe6a43SSeth Howell 
74607fe6a43SSeth Howell static void
74707fe6a43SSeth Howell bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
74807fe6a43SSeth Howell {
74907fe6a43SSeth Howell 	struct bdev_aio_io_channel *ch = ctx_buf;
75007fe6a43SSeth Howell 
7519e06b192SFedor Uporov 	bdev_aio_destroy_io(ch);
752195fb4e4SXiaodong Liu 
753195fb4e4SXiaodong Liu 	assert(ch->group_ch);
754195fb4e4SXiaodong Liu 	TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
755195fb4e4SXiaodong Liu 
75607fe6a43SSeth Howell 	spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
75707fe6a43SSeth Howell }
75807fe6a43SSeth Howell 
75907fe6a43SSeth Howell static struct spdk_io_channel *
76007fe6a43SSeth Howell bdev_aio_get_io_channel(void *ctx)
76107fe6a43SSeth Howell {
76207fe6a43SSeth Howell 	struct file_disk *fdisk = ctx;
76307fe6a43SSeth Howell 
76407fe6a43SSeth Howell 	return spdk_get_io_channel(fdisk);
76507fe6a43SSeth Howell }
76607fe6a43SSeth Howell 
76707fe6a43SSeth Howell 
76807fe6a43SSeth Howell static int
76907fe6a43SSeth Howell bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
77007fe6a43SSeth Howell {
77107fe6a43SSeth Howell 	struct file_disk *fdisk = ctx;
77207fe6a43SSeth Howell 
77307fe6a43SSeth Howell 	spdk_json_write_named_object_begin(w, "aio");
77407fe6a43SSeth Howell 
77507fe6a43SSeth Howell 	spdk_json_write_named_string(w, "filename", fdisk->filename);
77607fe6a43SSeth Howell 
7774c6a2e3dSYuhua 	spdk_json_write_named_bool(w, "block_size_override", fdisk->block_size_override);
7784c6a2e3dSYuhua 
7794c6a2e3dSYuhua 	spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
7804c6a2e3dSYuhua 
781648a5414Szhenwei pi 	spdk_json_write_named_bool(w, "fallocate", fdisk->fallocate);
782648a5414Szhenwei pi 
78307fe6a43SSeth Howell 	spdk_json_write_object_end(w);
78407fe6a43SSeth Howell 
78507fe6a43SSeth Howell 	return 0;
78607fe6a43SSeth Howell }
78707fe6a43SSeth Howell 
78807fe6a43SSeth Howell static void
78907fe6a43SSeth Howell bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
79007fe6a43SSeth Howell {
79107fe6a43SSeth Howell 	struct file_disk *fdisk = bdev->ctxt;
792e5693d68SMateusz Kozlowski 	const struct spdk_uuid *uuid = spdk_bdev_get_uuid(bdev);
79307fe6a43SSeth Howell 
79407fe6a43SSeth Howell 	spdk_json_write_object_begin(w);
79507fe6a43SSeth Howell 
79607fe6a43SSeth Howell 	spdk_json_write_named_string(w, "method", "bdev_aio_create");
79707fe6a43SSeth Howell 
79807fe6a43SSeth Howell 	spdk_json_write_named_object_begin(w, "params");
79907fe6a43SSeth Howell 	spdk_json_write_named_string(w, "name", bdev->name);
80007fe6a43SSeth Howell 	if (fdisk->block_size_override) {
80107fe6a43SSeth Howell 		spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
80207fe6a43SSeth Howell 	}
80307fe6a43SSeth Howell 	spdk_json_write_named_string(w, "filename", fdisk->filename);
8044c6a2e3dSYuhua 	spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
805648a5414Szhenwei pi 	spdk_json_write_named_bool(w, "fallocate", fdisk->fallocate);
806e5693d68SMateusz Kozlowski 	if (!spdk_uuid_is_null(uuid)) {
807e5693d68SMateusz Kozlowski 		spdk_json_write_named_uuid(w, "uuid", uuid);
808e5693d68SMateusz Kozlowski 	}
80907fe6a43SSeth Howell 	spdk_json_write_object_end(w);
81007fe6a43SSeth Howell 
81107fe6a43SSeth Howell 	spdk_json_write_object_end(w);
81207fe6a43SSeth Howell }
81307fe6a43SSeth Howell 
81407fe6a43SSeth Howell static const struct spdk_bdev_fn_table aio_fn_table = {
81507fe6a43SSeth Howell 	.destruct		= bdev_aio_destruct,
81607fe6a43SSeth Howell 	.submit_request		= bdev_aio_submit_request,
81707fe6a43SSeth Howell 	.io_type_supported	= bdev_aio_io_type_supported,
81807fe6a43SSeth Howell 	.get_io_channel		= bdev_aio_get_io_channel,
81907fe6a43SSeth Howell 	.dump_info_json		= bdev_aio_dump_info_json,
82007fe6a43SSeth Howell 	.write_config_json	= bdev_aio_write_json_config,
82107fe6a43SSeth Howell };
82207fe6a43SSeth Howell 
8238dd1cd21SBen Walker static void
8248dd1cd21SBen Walker aio_free_disk(struct file_disk *fdisk)
82507fe6a43SSeth Howell {
82607fe6a43SSeth Howell 	if (fdisk == NULL) {
82707fe6a43SSeth Howell 		return;
82807fe6a43SSeth Howell 	}
82907fe6a43SSeth Howell 	free(fdisk->filename);
83007fe6a43SSeth Howell 	free(fdisk->disk.name);
83107fe6a43SSeth Howell 	free(fdisk);
83207fe6a43SSeth Howell }
83307fe6a43SSeth Howell 
83407fe6a43SSeth Howell static int
835ad8c05dcSLiu Xiaodong bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch)
836ad8c05dcSLiu Xiaodong {
837ad8c05dcSLiu Xiaodong 	int efd;
838ad8c05dcSLiu Xiaodong 
839ad8c05dcSLiu Xiaodong 	efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
840ad8c05dcSLiu Xiaodong 	if (efd < 0) {
841ad8c05dcSLiu Xiaodong 		return -1;
842ad8c05dcSLiu Xiaodong 	}
843ad8c05dcSLiu Xiaodong 
844ad8c05dcSLiu Xiaodong 	ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch);
845ad8c05dcSLiu Xiaodong 	if (ch->intr == NULL) {
846ad8c05dcSLiu Xiaodong 		close(efd);
847ad8c05dcSLiu Xiaodong 		return -1;
848ad8c05dcSLiu Xiaodong 	}
849ad8c05dcSLiu Xiaodong 	ch->efd = efd;
850ad8c05dcSLiu Xiaodong 
851ad8c05dcSLiu Xiaodong 	return 0;
852ad8c05dcSLiu Xiaodong }
853ad8c05dcSLiu Xiaodong 
854ad8c05dcSLiu Xiaodong static void
855ad8c05dcSLiu Xiaodong bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch)
856ad8c05dcSLiu Xiaodong {
857ad8c05dcSLiu Xiaodong 	spdk_interrupt_unregister(&ch->intr);
858ad8c05dcSLiu Xiaodong 	close(ch->efd);
8597f6afb7bSLiu Xiaodong 	ch->efd = -1;
860ad8c05dcSLiu Xiaodong }
861ad8c05dcSLiu Xiaodong 
862ad8c05dcSLiu Xiaodong static int
86307fe6a43SSeth Howell bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
86407fe6a43SSeth Howell {
86507fe6a43SSeth Howell 	struct bdev_aio_group_channel *ch = ctx_buf;
8662d36f6f1SLiu Xiaodong 	int rc;
86707fe6a43SSeth Howell 
868195fb4e4SXiaodong Liu 	TAILQ_INIT(&ch->io_ch_head);
8697f6afb7bSLiu Xiaodong 	/* Initialize ch->efd to be invalid and unused. */
8707f6afb7bSLiu Xiaodong 	ch->efd = -1;
871ad8c05dcSLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
8722d36f6f1SLiu Xiaodong 		rc = bdev_aio_register_interrupt(ch);
8732d36f6f1SLiu Xiaodong 		if (rc < 0) {
8742d36f6f1SLiu Xiaodong 			SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n");
8752d36f6f1SLiu Xiaodong 			return rc;
8762d36f6f1SLiu Xiaodong 		}
877ad8c05dcSLiu Xiaodong 	}
878ad8c05dcSLiu Xiaodong 
879ab0bc5c2SShuhei Matsumoto 	ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
8805c782a70SJim Harris 	spdk_poller_register_interrupt(ch->poller, NULL, NULL);
881ad8c05dcSLiu Xiaodong 
88207fe6a43SSeth Howell 	return 0;
88307fe6a43SSeth Howell }
88407fe6a43SSeth Howell 
88507fe6a43SSeth Howell static void
88607fe6a43SSeth Howell bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
88707fe6a43SSeth Howell {
88807fe6a43SSeth Howell 	struct bdev_aio_group_channel *ch = ctx_buf;
88907fe6a43SSeth Howell 
890195fb4e4SXiaodong Liu 	if (!TAILQ_EMPTY(&ch->io_ch_head)) {
891195fb4e4SXiaodong Liu 		SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
892195fb4e4SXiaodong Liu 	}
89307fe6a43SSeth Howell 
89407fe6a43SSeth Howell 	spdk_poller_unregister(&ch->poller);
8952d36f6f1SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
8962d36f6f1SLiu Xiaodong 		bdev_aio_unregister_interrupt(ch);
8972d36f6f1SLiu Xiaodong 	}
89807fe6a43SSeth Howell }
89907fe6a43SSeth Howell 
90007fe6a43SSeth Howell int
901648a5414Szhenwei pi create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly,
902e5693d68SMateusz Kozlowski 		bool fallocate, const struct spdk_uuid *uuid)
90307fe6a43SSeth Howell {
90407fe6a43SSeth Howell 	struct file_disk *fdisk;
90507fe6a43SSeth Howell 	uint32_t detected_block_size;
90607fe6a43SSeth Howell 	uint64_t disk_size;
90707fe6a43SSeth Howell 	int rc;
90807fe6a43SSeth Howell 
909648a5414Szhenwei pi #ifdef __FreeBSD__
910648a5414Szhenwei pi 	if (fallocate) {
911648a5414Szhenwei pi 		SPDK_ERRLOG("Unable to support fallocate on this platform\n");
912648a5414Szhenwei pi 		return -ENOTSUP;
913648a5414Szhenwei pi 	}
914648a5414Szhenwei pi #endif
915648a5414Szhenwei pi 
91607fe6a43SSeth Howell 	fdisk = calloc(1, sizeof(*fdisk));
91707fe6a43SSeth Howell 	if (!fdisk) {
91807fe6a43SSeth Howell 		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
91907fe6a43SSeth Howell 		return -ENOMEM;
92007fe6a43SSeth Howell 	}
9214c6a2e3dSYuhua 	fdisk->readonly = readonly;
922648a5414Szhenwei pi 	fdisk->fallocate = fallocate;
92307fe6a43SSeth Howell 
92407fe6a43SSeth Howell 	fdisk->filename = strdup(filename);
92507fe6a43SSeth Howell 	if (!fdisk->filename) {
92607fe6a43SSeth Howell 		rc = -ENOMEM;
92707fe6a43SSeth Howell 		goto error_return;
92807fe6a43SSeth Howell 	}
92907fe6a43SSeth Howell 
93007fe6a43SSeth Howell 	if (bdev_aio_open(fdisk)) {
93107fe6a43SSeth Howell 		SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
93207fe6a43SSeth Howell 		rc = -errno;
93307fe6a43SSeth Howell 		goto error_return;
93407fe6a43SSeth Howell 	}
93507fe6a43SSeth Howell 
93607fe6a43SSeth Howell 	disk_size = spdk_fd_get_size(fdisk->fd);
93707fe6a43SSeth Howell 
93807fe6a43SSeth Howell 	fdisk->disk.name = strdup(name);
93907fe6a43SSeth Howell 	if (!fdisk->disk.name) {
94007fe6a43SSeth Howell 		rc = -ENOMEM;
94107fe6a43SSeth Howell 		goto error_return;
94207fe6a43SSeth Howell 	}
94307fe6a43SSeth Howell 	fdisk->disk.product_name = "AIO disk";
94407fe6a43SSeth Howell 	fdisk->disk.module = &aio_if;
94507fe6a43SSeth Howell 
94607fe6a43SSeth Howell 	fdisk->disk.write_cache = 1;
94707fe6a43SSeth Howell 
94807fe6a43SSeth Howell 	detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
94907fe6a43SSeth Howell 	if (block_size == 0) {
95007fe6a43SSeth Howell 		/* User did not specify block size - use autodetected block size. */
95107fe6a43SSeth Howell 		if (detected_block_size == 0) {
95207fe6a43SSeth Howell 			SPDK_ERRLOG("Block size could not be auto-detected\n");
95307fe6a43SSeth Howell 			rc = -EINVAL;
95407fe6a43SSeth Howell 			goto error_return;
95507fe6a43SSeth Howell 		}
95607fe6a43SSeth Howell 		fdisk->block_size_override = false;
95707fe6a43SSeth Howell 		block_size = detected_block_size;
95807fe6a43SSeth Howell 	} else {
95907fe6a43SSeth Howell 		if (block_size < detected_block_size) {
96007fe6a43SSeth Howell 			SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
96107fe6a43SSeth Howell 				    "auto-detected block size %" PRIu32 "\n",
96207fe6a43SSeth Howell 				    block_size, detected_block_size);
96307fe6a43SSeth Howell 			rc = -EINVAL;
96407fe6a43SSeth Howell 			goto error_return;
96507fe6a43SSeth Howell 		} else if (detected_block_size != 0 && block_size != detected_block_size) {
96607fe6a43SSeth Howell 			SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
96707fe6a43SSeth Howell 				     "auto-detected block size %" PRIu32 "\n",
96807fe6a43SSeth Howell 				     block_size, detected_block_size);
96907fe6a43SSeth Howell 		}
97007fe6a43SSeth Howell 		fdisk->block_size_override = true;
97107fe6a43SSeth Howell 	}
97207fe6a43SSeth Howell 
97307fe6a43SSeth Howell 	if (block_size < 512) {
97407fe6a43SSeth Howell 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
97507fe6a43SSeth Howell 		rc = -EINVAL;
97607fe6a43SSeth Howell 		goto error_return;
97707fe6a43SSeth Howell 	}
97807fe6a43SSeth Howell 
97907fe6a43SSeth Howell 	if (!spdk_u32_is_pow2(block_size)) {
98007fe6a43SSeth Howell 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
98107fe6a43SSeth Howell 		rc = -EINVAL;
98207fe6a43SSeth Howell 		goto error_return;
98307fe6a43SSeth Howell 	}
98407fe6a43SSeth Howell 
98507fe6a43SSeth Howell 	fdisk->disk.blocklen = block_size;
98629bbb9baSChangpeng Liu 	if (fdisk->block_size_override && detected_block_size) {
98729bbb9baSChangpeng Liu 		fdisk->disk.required_alignment = spdk_u32log2(detected_block_size);
98829bbb9baSChangpeng Liu 	} else {
98907fe6a43SSeth Howell 		fdisk->disk.required_alignment = spdk_u32log2(block_size);
99029bbb9baSChangpeng Liu 	}
99107fe6a43SSeth Howell 
99207fe6a43SSeth Howell 	if (disk_size % fdisk->disk.blocklen != 0) {
99307fe6a43SSeth Howell 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
99407fe6a43SSeth Howell 			    disk_size, fdisk->disk.blocklen);
99507fe6a43SSeth Howell 		rc = -EINVAL;
99607fe6a43SSeth Howell 		goto error_return;
99707fe6a43SSeth Howell 	}
99807fe6a43SSeth Howell 
99907fe6a43SSeth Howell 	fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
100007fe6a43SSeth Howell 	fdisk->disk.ctxt = fdisk;
1001e5693d68SMateusz Kozlowski 	spdk_uuid_copy(&fdisk->disk.uuid, uuid);
100207fe6a43SSeth Howell 
100307fe6a43SSeth Howell 	fdisk->disk.fn_table = &aio_fn_table;
100407fe6a43SSeth Howell 
100507fe6a43SSeth Howell 	spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
100607fe6a43SSeth Howell 				sizeof(struct bdev_aio_io_channel),
100707fe6a43SSeth Howell 				fdisk->disk.name);
100807fe6a43SSeth Howell 	rc = spdk_bdev_register(&fdisk->disk);
100907fe6a43SSeth Howell 	if (rc) {
101007fe6a43SSeth Howell 		spdk_io_device_unregister(fdisk, NULL);
101107fe6a43SSeth Howell 		goto error_return;
101207fe6a43SSeth Howell 	}
101307fe6a43SSeth Howell 
101407fe6a43SSeth Howell 	TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
101507fe6a43SSeth Howell 	return 0;
101607fe6a43SSeth Howell 
101707fe6a43SSeth Howell error_return:
101807fe6a43SSeth Howell 	bdev_aio_close(fdisk);
101907fe6a43SSeth Howell 	aio_free_disk(fdisk);
102007fe6a43SSeth Howell 	return rc;
102107fe6a43SSeth Howell }
102207fe6a43SSeth Howell 
1023d77d3179SShuhei Matsumoto static void
1024d77d3179SShuhei Matsumoto dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
1025b635d19aSshuochen0311 {
1026d77d3179SShuhei Matsumoto }
1027d77d3179SShuhei Matsumoto 
1028d77d3179SShuhei Matsumoto int
1029d77d3179SShuhei Matsumoto bdev_aio_rescan(const char *name)
1030d77d3179SShuhei Matsumoto {
1031d77d3179SShuhei Matsumoto 	struct spdk_bdev_desc *desc;
1032d77d3179SShuhei Matsumoto 	struct spdk_bdev *bdev;
1033b635d19aSshuochen0311 	struct file_disk *fdisk;
1034b635d19aSshuochen0311 	uint64_t disk_size, blockcnt;
1035b635d19aSshuochen0311 	int rc;
1036b635d19aSshuochen0311 
1037d77d3179SShuhei Matsumoto 	rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
1038d77d3179SShuhei Matsumoto 	if (rc != 0) {
1039d77d3179SShuhei Matsumoto 		return rc;
1040d77d3179SShuhei Matsumoto 	}
1041d77d3179SShuhei Matsumoto 
1042d77d3179SShuhei Matsumoto 	bdev = spdk_bdev_desc_get_bdev(desc);
1043d77d3179SShuhei Matsumoto 	if (bdev->module != &aio_if) {
1044d77d3179SShuhei Matsumoto 		rc = -ENODEV;
1045d77d3179SShuhei Matsumoto 		goto exit;
1046b635d19aSshuochen0311 	}
1047b635d19aSshuochen0311 
1048b635d19aSshuochen0311 	fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk);
1049b635d19aSshuochen0311 	disk_size = spdk_fd_get_size(fdisk->fd);
1050d77d3179SShuhei Matsumoto 	blockcnt = disk_size / bdev->blocklen;
1051b635d19aSshuochen0311 
1052d77d3179SShuhei Matsumoto 	if (bdev->blockcnt != blockcnt) {
1053b635d19aSshuochen0311 		SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %"
1054b635d19aSshuochen0311 			       PRIu64 "\n",
1055b635d19aSshuochen0311 			       fdisk->filename,
1056d77d3179SShuhei Matsumoto 			       bdev->blockcnt,
1057b635d19aSshuochen0311 			       blockcnt);
1058d77d3179SShuhei Matsumoto 		rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt);
1059b635d19aSshuochen0311 		if (rc != 0) {
1060b635d19aSshuochen0311 			SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n",
1061b635d19aSshuochen0311 				    fdisk->filename, rc);
1062d77d3179SShuhei Matsumoto 			goto exit;
1063b635d19aSshuochen0311 		}
1064b635d19aSshuochen0311 	}
1065b635d19aSshuochen0311 
1066d77d3179SShuhei Matsumoto exit:
1067d77d3179SShuhei Matsumoto 	spdk_bdev_close(desc);
1068d77d3179SShuhei Matsumoto 	return rc;
1069b635d19aSshuochen0311 }
1070b635d19aSshuochen0311 
107107fe6a43SSeth Howell struct delete_aio_bdev_ctx {
107207fe6a43SSeth Howell 	delete_aio_bdev_complete cb_fn;
107307fe6a43SSeth Howell 	void *cb_arg;
107407fe6a43SSeth Howell };
107507fe6a43SSeth Howell 
107607fe6a43SSeth Howell static void
107707fe6a43SSeth Howell aio_bdev_unregister_cb(void *arg, int bdeverrno)
107807fe6a43SSeth Howell {
107907fe6a43SSeth Howell 	struct delete_aio_bdev_ctx *ctx = arg;
108007fe6a43SSeth Howell 
108107fe6a43SSeth Howell 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
108207fe6a43SSeth Howell 	free(ctx);
108307fe6a43SSeth Howell }
108407fe6a43SSeth Howell 
108507fe6a43SSeth Howell void
10864573e4ccSShuhei Matsumoto bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg)
108707fe6a43SSeth Howell {
108807fe6a43SSeth Howell 	struct delete_aio_bdev_ctx *ctx;
10894573e4ccSShuhei Matsumoto 	int rc;
109007fe6a43SSeth Howell 
109107fe6a43SSeth Howell 	ctx = calloc(1, sizeof(*ctx));
109207fe6a43SSeth Howell 	if (ctx == NULL) {
109307fe6a43SSeth Howell 		cb_fn(cb_arg, -ENOMEM);
109407fe6a43SSeth Howell 		return;
109507fe6a43SSeth Howell 	}
109607fe6a43SSeth Howell 
109707fe6a43SSeth Howell 	ctx->cb_fn = cb_fn;
109807fe6a43SSeth Howell 	ctx->cb_arg = cb_arg;
10994573e4ccSShuhei Matsumoto 	rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx);
11004573e4ccSShuhei Matsumoto 	if (rc != 0) {
11014573e4ccSShuhei Matsumoto 		aio_bdev_unregister_cb(ctx, rc);
11024573e4ccSShuhei Matsumoto 	}
110307fe6a43SSeth Howell }
110407fe6a43SSeth Howell 
110507fe6a43SSeth Howell static int
110607fe6a43SSeth Howell bdev_aio_initialize(void)
110707fe6a43SSeth Howell {
110807fe6a43SSeth Howell 	spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
11095f14cec8STomasz Zawadzki 				sizeof(struct bdev_aio_group_channel), "aio_module");
111007fe6a43SSeth Howell 
111107fe6a43SSeth Howell 	return 0;
111207fe6a43SSeth Howell }
111307fe6a43SSeth Howell 
111407fe6a43SSeth Howell static void
111507fe6a43SSeth Howell bdev_aio_fini(void)
111607fe6a43SSeth Howell {
111707fe6a43SSeth Howell 	spdk_io_device_unregister(&aio_if, NULL);
111807fe6a43SSeth Howell }
111907fe6a43SSeth Howell 
11202172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(aio)
1121