xref: /spdk/module/bdev/aio/bdev_aio.c (revision 78ad672861c83ea22039cabe534caf4635051abf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_aio.h"
35 
36 #include "spdk/stdinc.h"
37 
38 #include "spdk/barrier.h"
39 #include "spdk/bdev.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk/env.h"
42 #include "spdk/fd.h"
43 #include "spdk/likely.h"
44 #include "spdk/thread.h"
45 #include "spdk/json.h"
46 #include "spdk/util.h"
47 #include "spdk/string.h"
48 
49 #include "spdk/log.h"
50 
51 #include <libaio.h>
52 
53 struct bdev_aio_io_channel {
54 	uint64_t				io_inflight;
55 	io_context_t				io_ctx;
56 	struct bdev_aio_group_channel		*group_ch;
57 	TAILQ_ENTRY(bdev_aio_io_channel)	link;
58 };
59 
60 struct bdev_aio_group_channel {
61 	struct spdk_poller			*poller;
62 	TAILQ_HEAD(, bdev_aio_io_channel)	io_ch_head;
63 };
64 
65 struct bdev_aio_task {
66 	struct iocb			iocb;
67 	uint64_t			len;
68 	struct bdev_aio_io_channel	*ch;
69 	TAILQ_ENTRY(bdev_aio_task)	link;
70 };
71 
72 struct file_disk {
73 	struct bdev_aio_task	*reset_task;
74 	struct spdk_poller	*reset_retry_timer;
75 	struct spdk_bdev	disk;
76 	char			*filename;
77 	int			fd;
78 	TAILQ_ENTRY(file_disk)  link;
79 	bool			block_size_override;
80 };
81 
82 /* For user space reaping of completions */
83 struct spdk_aio_ring {
84 	uint32_t id;
85 	uint32_t size;
86 	uint32_t head;
87 	uint32_t tail;
88 
89 	uint32_t version;
90 	uint32_t compat_features;
91 	uint32_t incompat_features;
92 	uint32_t header_length;
93 };
94 
95 #define SPDK_AIO_RING_VERSION	0xa10a10a1
96 
97 static int bdev_aio_initialize(void);
98 static void bdev_aio_fini(void);
99 static void aio_free_disk(struct file_disk *fdisk);
100 static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head);
101 
102 #define SPDK_AIO_QUEUE_DEPTH 128
103 #define MAX_EVENTS_PER_POLL 32
104 
105 static int
106 bdev_aio_get_ctx_size(void)
107 {
108 	return sizeof(struct bdev_aio_task);
109 }
110 
111 static struct spdk_bdev_module aio_if = {
112 	.name		= "aio",
113 	.module_init	= bdev_aio_initialize,
114 	.module_fini	= bdev_aio_fini,
115 	.get_ctx_size	= bdev_aio_get_ctx_size,
116 };
117 
118 SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
119 
120 static int
121 bdev_aio_open(struct file_disk *disk)
122 {
123 	int fd;
124 
125 	fd = open(disk->filename, O_RDWR | O_DIRECT);
126 	if (fd < 0) {
127 		/* Try without O_DIRECT for non-disk files */
128 		fd = open(disk->filename, O_RDWR);
129 		if (fd < 0) {
130 			SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
131 				    disk->filename, errno, spdk_strerror(errno));
132 			disk->fd = -1;
133 			return -1;
134 		}
135 	}
136 
137 	disk->fd = fd;
138 
139 	return 0;
140 }
141 
142 static int
143 bdev_aio_close(struct file_disk *disk)
144 {
145 	int rc;
146 
147 	if (disk->fd == -1) {
148 		return 0;
149 	}
150 
151 	rc = close(disk->fd);
152 	if (rc < 0) {
153 		SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
154 			    disk->fd, errno, spdk_strerror(errno));
155 		return -1;
156 	}
157 
158 	disk->fd = -1;
159 
160 	return 0;
161 }
162 
163 static int64_t
164 bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
165 	       struct bdev_aio_task *aio_task,
166 	       struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
167 {
168 	struct iocb *iocb = &aio_task->iocb;
169 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
170 	int rc;
171 
172 	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
173 	iocb->data = aio_task;
174 	aio_task->len = nbytes;
175 	aio_task->ch = aio_ch;
176 
177 	SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
178 		      iovcnt, nbytes, offset);
179 
180 	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
181 	if (rc < 0) {
182 		if (rc == -EAGAIN) {
183 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
184 		} else {
185 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
186 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
187 		}
188 		return -1;
189 	}
190 	aio_ch->io_inflight++;
191 	return nbytes;
192 }
193 
194 static int64_t
195 bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
196 		struct bdev_aio_task *aio_task,
197 		struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
198 {
199 	struct iocb *iocb = &aio_task->iocb;
200 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
201 	int rc;
202 
203 	io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
204 	iocb->data = aio_task;
205 	aio_task->len = len;
206 	aio_task->ch = aio_ch;
207 
208 	SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
209 		      iovcnt, len, offset);
210 
211 	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
212 	if (rc < 0) {
213 		if (rc == -EAGAIN) {
214 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
215 		} else {
216 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
217 			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
218 		}
219 		return -1;
220 	}
221 	aio_ch->io_inflight++;
222 	return len;
223 }
224 
225 static void
226 bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
227 {
228 	int rc = fsync(fdisk->fd);
229 
230 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
231 			      rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
232 }
233 
234 static int
235 bdev_aio_destruct(void *ctx)
236 {
237 	struct file_disk *fdisk = ctx;
238 	int rc = 0;
239 
240 	TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
241 	rc = bdev_aio_close(fdisk);
242 	if (rc < 0) {
243 		SPDK_ERRLOG("bdev_aio_close() failed\n");
244 	}
245 	spdk_io_device_unregister(fdisk, NULL);
246 	aio_free_disk(fdisk);
247 	return rc;
248 }
249 
250 static int
251 bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
252 {
253 	uint32_t head, tail, count;
254 	struct spdk_aio_ring *ring;
255 	struct timespec timeout;
256 	struct io_event *kevents;
257 
258 	ring = (struct spdk_aio_ring *)io_ctx;
259 
260 	if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
261 		timeout.tv_sec = 0;
262 		timeout.tv_nsec = 0;
263 
264 		return io_getevents(io_ctx, 0, max, uevents, &timeout);
265 	}
266 
267 	/* Read the current state out of the ring */
268 	head = ring->head;
269 	tail = ring->tail;
270 
271 	/* This memory barrier is required to prevent the loads above
272 	 * from being re-ordered with stores to the events array
273 	 * potentially occurring on other threads. */
274 	spdk_smp_rmb();
275 
276 	/* Calculate how many items are in the circular ring */
277 	count = tail - head;
278 	if (tail < head) {
279 		count += ring->size;
280 	}
281 
282 	/* Reduce the count to the limit provided by the user */
283 	count = spdk_min(max, count);
284 
285 	/* Grab the memory location of the event array */
286 	kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
287 
288 	/* Copy the events out of the ring. */
289 	if ((head + count) <= ring->size) {
290 		/* Only one copy is required */
291 		memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
292 	} else {
293 		uint32_t first_part = ring->size - head;
294 		/* Two copies are required */
295 		memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
296 		memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
297 	}
298 
299 	/* Update the head pointer. On x86, stores will not be reordered with older loads,
300 	 * so the copies out of the event array will always be complete prior to this
301 	 * update becoming visible. On other architectures this is not guaranteed, so
302 	 * add a barrier. */
303 #if defined(__i386__) || defined(__x86_64__)
304 	spdk_compiler_barrier();
305 #else
306 	spdk_smp_mb();
307 #endif
308 	ring->head = (head + count) % ring->size;
309 
310 	return count;
311 }
312 
313 static int
314 bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
315 {
316 	int nr, i = 0;
317 	enum spdk_bdev_io_status status;
318 	struct bdev_aio_task *aio_task;
319 	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
320 
321 	nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
322 
323 	if (nr < 0) {
324 		return 0;
325 	}
326 
327 	for (i = 0; i < nr; i++) {
328 		aio_task = events[i].data;
329 		if (events[i].res != aio_task->len) {
330 			status = SPDK_BDEV_IO_STATUS_FAILED;
331 		} else {
332 			status = SPDK_BDEV_IO_STATUS_SUCCESS;
333 		}
334 
335 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
336 		aio_task->ch->io_inflight--;
337 	}
338 
339 	return nr;
340 }
341 
342 static int
343 bdev_aio_group_poll(void *arg)
344 {
345 	struct bdev_aio_group_channel *group_ch = arg;
346 	struct bdev_aio_io_channel *io_ch;
347 	int nr = 0;
348 
349 	TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
350 		nr += bdev_aio_io_channel_poll(io_ch);
351 	}
352 
353 	return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
354 }
355 
356 static void
357 _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
358 {
359 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
360 	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
361 
362 	if (aio_ch->io_inflight) {
363 		spdk_for_each_channel_continue(i, -1);
364 		return;
365 	}
366 
367 	spdk_for_each_channel_continue(i, 0);
368 }
369 
370 static int bdev_aio_reset_retry_timer(void *arg);
371 
372 static void
373 _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
374 {
375 	struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
376 
377 	if (status == -1) {
378 		fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500);
379 		return;
380 	}
381 
382 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
383 }
384 
385 static int
386 bdev_aio_reset_retry_timer(void *arg)
387 {
388 	struct file_disk *fdisk = arg;
389 
390 	if (fdisk->reset_retry_timer) {
391 		spdk_poller_unregister(&fdisk->reset_retry_timer);
392 	}
393 
394 	spdk_for_each_channel(fdisk,
395 			      _bdev_aio_get_io_inflight,
396 			      fdisk,
397 			      _bdev_aio_get_io_inflight_done);
398 
399 	return SPDK_POLLER_BUSY;
400 }
401 
402 static void
403 bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
404 {
405 	fdisk->reset_task = aio_task;
406 
407 	bdev_aio_reset_retry_timer(fdisk);
408 }
409 
410 static void
411 bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
412 		    bool success)
413 {
414 	if (!success) {
415 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
416 		return;
417 	}
418 
419 	switch (bdev_io->type) {
420 	case SPDK_BDEV_IO_TYPE_READ:
421 		bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
422 			       ch,
423 			       (struct bdev_aio_task *)bdev_io->driver_ctx,
424 			       bdev_io->u.bdev.iovs,
425 			       bdev_io->u.bdev.iovcnt,
426 			       bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
427 			       bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
428 		break;
429 	case SPDK_BDEV_IO_TYPE_WRITE:
430 		bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
431 				ch,
432 				(struct bdev_aio_task *)bdev_io->driver_ctx,
433 				bdev_io->u.bdev.iovs,
434 				bdev_io->u.bdev.iovcnt,
435 				bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
436 				bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
437 		break;
438 	default:
439 		SPDK_ERRLOG("Wrong io type\n");
440 		break;
441 	}
442 }
443 
444 static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
445 {
446 	switch (bdev_io->type) {
447 	/* Read and write operations must be performed on buffers aligned to
448 	 * bdev->required_alignment. If user specified unaligned buffers,
449 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
450 	case SPDK_BDEV_IO_TYPE_READ:
451 	case SPDK_BDEV_IO_TYPE_WRITE:
452 		spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
453 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
454 		return 0;
455 	case SPDK_BDEV_IO_TYPE_FLUSH:
456 		bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
457 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
458 		return 0;
459 
460 	case SPDK_BDEV_IO_TYPE_RESET:
461 		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
462 			       (struct bdev_aio_task *)bdev_io->driver_ctx);
463 		return 0;
464 	default:
465 		return -1;
466 	}
467 }
468 
469 static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
470 {
471 	if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
472 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
473 	}
474 }
475 
476 static bool
477 bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
478 {
479 	switch (io_type) {
480 	case SPDK_BDEV_IO_TYPE_READ:
481 	case SPDK_BDEV_IO_TYPE_WRITE:
482 	case SPDK_BDEV_IO_TYPE_FLUSH:
483 	case SPDK_BDEV_IO_TYPE_RESET:
484 		return true;
485 
486 	default:
487 		return false;
488 	}
489 }
490 
491 static int
492 bdev_aio_create_cb(void *io_device, void *ctx_buf)
493 {
494 	struct bdev_aio_io_channel *ch = ctx_buf;
495 
496 	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
497 		SPDK_ERRLOG("async I/O context setup failure\n");
498 		return -1;
499 	}
500 
501 	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
502 	TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
503 
504 	return 0;
505 }
506 
507 static void
508 bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
509 {
510 	struct bdev_aio_io_channel *ch = ctx_buf;
511 
512 	io_destroy(ch->io_ctx);
513 
514 	assert(ch->group_ch);
515 	TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
516 
517 	spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
518 }
519 
520 static struct spdk_io_channel *
521 bdev_aio_get_io_channel(void *ctx)
522 {
523 	struct file_disk *fdisk = ctx;
524 
525 	return spdk_get_io_channel(fdisk);
526 }
527 
528 
529 static int
530 bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
531 {
532 	struct file_disk *fdisk = ctx;
533 
534 	spdk_json_write_named_object_begin(w, "aio");
535 
536 	spdk_json_write_named_string(w, "filename", fdisk->filename);
537 
538 	spdk_json_write_object_end(w);
539 
540 	return 0;
541 }
542 
543 static void
544 bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
545 {
546 	struct file_disk *fdisk = bdev->ctxt;
547 
548 	spdk_json_write_object_begin(w);
549 
550 	spdk_json_write_named_string(w, "method", "bdev_aio_create");
551 
552 	spdk_json_write_named_object_begin(w, "params");
553 	spdk_json_write_named_string(w, "name", bdev->name);
554 	if (fdisk->block_size_override) {
555 		spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
556 	}
557 	spdk_json_write_named_string(w, "filename", fdisk->filename);
558 	spdk_json_write_object_end(w);
559 
560 	spdk_json_write_object_end(w);
561 }
562 
563 static const struct spdk_bdev_fn_table aio_fn_table = {
564 	.destruct		= bdev_aio_destruct,
565 	.submit_request		= bdev_aio_submit_request,
566 	.io_type_supported	= bdev_aio_io_type_supported,
567 	.get_io_channel		= bdev_aio_get_io_channel,
568 	.dump_info_json		= bdev_aio_dump_info_json,
569 	.write_config_json	= bdev_aio_write_json_config,
570 };
571 
572 static void aio_free_disk(struct file_disk *fdisk)
573 {
574 	if (fdisk == NULL) {
575 		return;
576 	}
577 	free(fdisk->filename);
578 	free(fdisk->disk.name);
579 	free(fdisk);
580 }
581 
582 static int
583 bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
584 {
585 	struct bdev_aio_group_channel *ch = ctx_buf;
586 
587 	TAILQ_INIT(&ch->io_ch_head);
588 
589 	ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
590 	return 0;
591 }
592 
593 static void
594 bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
595 {
596 	struct bdev_aio_group_channel *ch = ctx_buf;
597 
598 	if (!TAILQ_EMPTY(&ch->io_ch_head)) {
599 		SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
600 	}
601 
602 	spdk_poller_unregister(&ch->poller);
603 }
604 
605 int
606 create_aio_bdev(const char *name, const char *filename, uint32_t block_size)
607 {
608 	struct file_disk *fdisk;
609 	uint32_t detected_block_size;
610 	uint64_t disk_size;
611 	int rc;
612 
613 	fdisk = calloc(1, sizeof(*fdisk));
614 	if (!fdisk) {
615 		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
616 		return -ENOMEM;
617 	}
618 
619 	fdisk->filename = strdup(filename);
620 	if (!fdisk->filename) {
621 		rc = -ENOMEM;
622 		goto error_return;
623 	}
624 
625 	if (bdev_aio_open(fdisk)) {
626 		SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
627 		rc = -errno;
628 		goto error_return;
629 	}
630 
631 	disk_size = spdk_fd_get_size(fdisk->fd);
632 
633 	fdisk->disk.name = strdup(name);
634 	if (!fdisk->disk.name) {
635 		rc = -ENOMEM;
636 		goto error_return;
637 	}
638 	fdisk->disk.product_name = "AIO disk";
639 	fdisk->disk.module = &aio_if;
640 
641 	fdisk->disk.write_cache = 1;
642 
643 	detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
644 	if (block_size == 0) {
645 		/* User did not specify block size - use autodetected block size. */
646 		if (detected_block_size == 0) {
647 			SPDK_ERRLOG("Block size could not be auto-detected\n");
648 			rc = -EINVAL;
649 			goto error_return;
650 		}
651 		fdisk->block_size_override = false;
652 		block_size = detected_block_size;
653 	} else {
654 		if (block_size < detected_block_size) {
655 			SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
656 				    "auto-detected block size %" PRIu32 "\n",
657 				    block_size, detected_block_size);
658 			rc = -EINVAL;
659 			goto error_return;
660 		} else if (detected_block_size != 0 && block_size != detected_block_size) {
661 			SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
662 				     "auto-detected block size %" PRIu32 "\n",
663 				     block_size, detected_block_size);
664 		}
665 		fdisk->block_size_override = true;
666 	}
667 
668 	if (block_size < 512) {
669 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
670 		rc = -EINVAL;
671 		goto error_return;
672 	}
673 
674 	if (!spdk_u32_is_pow2(block_size)) {
675 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
676 		rc = -EINVAL;
677 		goto error_return;
678 	}
679 
680 	fdisk->disk.blocklen = block_size;
681 	if (fdisk->block_size_override && detected_block_size) {
682 		fdisk->disk.required_alignment = spdk_u32log2(detected_block_size);
683 	} else {
684 		fdisk->disk.required_alignment = spdk_u32log2(block_size);
685 	}
686 
687 	if (disk_size % fdisk->disk.blocklen != 0) {
688 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
689 			    disk_size, fdisk->disk.blocklen);
690 		rc = -EINVAL;
691 		goto error_return;
692 	}
693 
694 	fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
695 	fdisk->disk.ctxt = fdisk;
696 
697 	fdisk->disk.fn_table = &aio_fn_table;
698 
699 	spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
700 				sizeof(struct bdev_aio_io_channel),
701 				fdisk->disk.name);
702 	rc = spdk_bdev_register(&fdisk->disk);
703 	if (rc) {
704 		spdk_io_device_unregister(fdisk, NULL);
705 		goto error_return;
706 	}
707 
708 	TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
709 	return 0;
710 
711 error_return:
712 	bdev_aio_close(fdisk);
713 	aio_free_disk(fdisk);
714 	return rc;
715 }
716 
717 struct delete_aio_bdev_ctx {
718 	delete_aio_bdev_complete cb_fn;
719 	void *cb_arg;
720 };
721 
722 static void
723 aio_bdev_unregister_cb(void *arg, int bdeverrno)
724 {
725 	struct delete_aio_bdev_ctx *ctx = arg;
726 
727 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
728 	free(ctx);
729 }
730 
731 void
732 bdev_aio_delete(struct spdk_bdev *bdev, delete_aio_bdev_complete cb_fn, void *cb_arg)
733 {
734 	struct delete_aio_bdev_ctx *ctx;
735 
736 	if (!bdev || bdev->module != &aio_if) {
737 		cb_fn(cb_arg, -ENODEV);
738 		return;
739 	}
740 
741 	ctx = calloc(1, sizeof(*ctx));
742 	if (ctx == NULL) {
743 		cb_fn(cb_arg, -ENOMEM);
744 		return;
745 	}
746 
747 	ctx->cb_fn = cb_fn;
748 	ctx->cb_arg = cb_arg;
749 	spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, ctx);
750 }
751 
752 static int
753 bdev_aio_initialize(void)
754 {
755 	spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
756 				sizeof(struct bdev_aio_group_channel), "aio_module");
757 
758 	return 0;
759 }
760 
761 static void
762 bdev_aio_fini(void)
763 {
764 	spdk_io_device_unregister(&aio_if, NULL);
765 }
766 
767 SPDK_LOG_REGISTER_COMPONENT(aio)
768