xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision 1c2e5edafa04dd2211880825da0d172d52f8e3ff)
16f338d4bSKrishna Kanth Reddy /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2022 Intel Corporation.
36f338d4bSKrishna Kanth Reddy  *   Copyright (c) Samsung Electronics Co., Ltd.
46f338d4bSKrishna Kanth Reddy  *   All rights reserved.
56f338d4bSKrishna Kanth Reddy  */
66f338d4bSKrishna Kanth Reddy 
76f338d4bSKrishna Kanth Reddy #include "libxnvme.h"
86f338d4bSKrishna Kanth Reddy 
96f338d4bSKrishna Kanth Reddy #include "bdev_xnvme.h"
106f338d4bSKrishna Kanth Reddy 
116f338d4bSKrishna Kanth Reddy #include "spdk/stdinc.h"
126f338d4bSKrishna Kanth Reddy 
136f338d4bSKrishna Kanth Reddy #include "spdk/barrier.h"
146f338d4bSKrishna Kanth Reddy #include "spdk/bdev.h"
156f338d4bSKrishna Kanth Reddy #include "spdk/env.h"
166f338d4bSKrishna Kanth Reddy #include "spdk/fd.h"
176f338d4bSKrishna Kanth Reddy #include "spdk/likely.h"
186f338d4bSKrishna Kanth Reddy #include "spdk/thread.h"
196f338d4bSKrishna Kanth Reddy #include "spdk/json.h"
206f338d4bSKrishna Kanth Reddy #include "spdk/util.h"
216f338d4bSKrishna Kanth Reddy #include "spdk/string.h"
226f338d4bSKrishna Kanth Reddy 
236f338d4bSKrishna Kanth Reddy #include "spdk/log.h"
246f338d4bSKrishna Kanth Reddy 
256f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel {
266f338d4bSKrishna Kanth Reddy 	struct xnvme_queue	*queue;
276f338d4bSKrishna Kanth Reddy 	struct spdk_poller	*poller;
286f338d4bSKrishna Kanth Reddy };
296f338d4bSKrishna Kanth Reddy 
306f338d4bSKrishna Kanth Reddy struct bdev_xnvme_task {
316f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_io_channel *ch;
326f338d4bSKrishna Kanth Reddy 	TAILQ_ENTRY(bdev_xnvme_task) link;
336f338d4bSKrishna Kanth Reddy };
346f338d4bSKrishna Kanth Reddy 
356f338d4bSKrishna Kanth Reddy struct bdev_xnvme {
366f338d4bSKrishna Kanth Reddy 	struct spdk_bdev	bdev;
376f338d4bSKrishna Kanth Reddy 	char			*filename;
3869bcb185STomasz Zawadzki 	char			*io_mechanism;
396f338d4bSKrishna Kanth Reddy 	struct xnvme_dev	*dev;
406f338d4bSKrishna Kanth Reddy 	uint32_t		nsid;
41b99b00e5SSimon A. F. Lund 	bool			conserve_cpu;
426f338d4bSKrishna Kanth Reddy 
436f338d4bSKrishna Kanth Reddy 	TAILQ_ENTRY(bdev_xnvme) link;
446f338d4bSKrishna Kanth Reddy };
456f338d4bSKrishna Kanth Reddy 
466f338d4bSKrishna Kanth Reddy static int bdev_xnvme_init(void);
476f338d4bSKrishna Kanth Reddy static void bdev_xnvme_fini(void);
486f338d4bSKrishna Kanth Reddy static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
496f338d4bSKrishna Kanth Reddy static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
506f338d4bSKrishna Kanth Reddy 
516f338d4bSKrishna Kanth Reddy static int
526f338d4bSKrishna Kanth Reddy bdev_xnvme_get_ctx_size(void)
536f338d4bSKrishna Kanth Reddy {
546f338d4bSKrishna Kanth Reddy 	return sizeof(struct bdev_xnvme_task);
556f338d4bSKrishna Kanth Reddy }
566f338d4bSKrishna Kanth Reddy 
5769bcb185STomasz Zawadzki static int
5869bcb185STomasz Zawadzki bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
5969bcb185STomasz Zawadzki {
6069bcb185STomasz Zawadzki 	struct bdev_xnvme *xnvme;
6169bcb185STomasz Zawadzki 
6269bcb185STomasz Zawadzki 	TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
6369bcb185STomasz Zawadzki 		spdk_json_write_object_begin(w);
6469bcb185STomasz Zawadzki 
6569bcb185STomasz Zawadzki 		spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
6669bcb185STomasz Zawadzki 
6769bcb185STomasz Zawadzki 		spdk_json_write_named_object_begin(w, "params");
6869bcb185STomasz Zawadzki 		spdk_json_write_named_string(w, "name", xnvme->bdev.name);
6969bcb185STomasz Zawadzki 		spdk_json_write_named_string(w, "filename", xnvme->filename);
7069bcb185STomasz Zawadzki 		spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
71b99b00e5SSimon A. F. Lund 		spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu);
7269bcb185STomasz Zawadzki 		spdk_json_write_object_end(w);
7369bcb185STomasz Zawadzki 
7469bcb185STomasz Zawadzki 		spdk_json_write_object_end(w);
7569bcb185STomasz Zawadzki 	}
7669bcb185STomasz Zawadzki 
7769bcb185STomasz Zawadzki 	return 0;
7869bcb185STomasz Zawadzki }
7969bcb185STomasz Zawadzki 
806f338d4bSKrishna Kanth Reddy static struct spdk_bdev_module xnvme_if = {
816f338d4bSKrishna Kanth Reddy 	.name		= "xnvme",
826f338d4bSKrishna Kanth Reddy 	.module_init	= bdev_xnvme_init,
836f338d4bSKrishna Kanth Reddy 	.module_fini	= bdev_xnvme_fini,
846f338d4bSKrishna Kanth Reddy 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
8569bcb185STomasz Zawadzki 	.config_json	= bdev_xnvme_config_json,
866f338d4bSKrishna Kanth Reddy };
876f338d4bSKrishna Kanth Reddy 
886f338d4bSKrishna Kanth Reddy SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
896f338d4bSKrishna Kanth Reddy 
906f338d4bSKrishna Kanth Reddy static struct spdk_io_channel *
916f338d4bSKrishna Kanth Reddy bdev_xnvme_get_io_channel(void *ctx)
926f338d4bSKrishna Kanth Reddy {
936f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme = ctx;
946f338d4bSKrishna Kanth Reddy 
956f338d4bSKrishna Kanth Reddy 	return spdk_get_io_channel(xnvme);
966f338d4bSKrishna Kanth Reddy }
976f338d4bSKrishna Kanth Reddy 
986f338d4bSKrishna Kanth Reddy static bool
996f338d4bSKrishna Kanth Reddy bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1006f338d4bSKrishna Kanth Reddy {
101f999d891SNadja Brix Koch 	struct bdev_xnvme *xnvme = ctx;
102f999d891SNadja Brix Koch 
1036f338d4bSKrishna Kanth Reddy 	switch (io_type) {
1046f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_READ:
1056f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_WRITE:
1066f338d4bSKrishna Kanth Reddy 		return true;
107f999d891SNadja Brix Koch 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
108f999d891SNadja Brix Koch 	case SPDK_BDEV_IO_TYPE_UNMAP:
109f999d891SNadja Brix Koch 		/* libaio and io_uring only supports read and write */
110f999d891SNadja Brix Koch 		return !strcmp(xnvme->io_mechanism, "io_uring_cmd") &&
111f999d891SNadja Brix Koch 		       xnvme_dev_get_csi(xnvme->dev) == XNVME_SPEC_CSI_NVM;
1126f338d4bSKrishna Kanth Reddy 	default:
1136f338d4bSKrishna Kanth Reddy 		return false;
1146f338d4bSKrishna Kanth Reddy 	}
1156f338d4bSKrishna Kanth Reddy }
1166f338d4bSKrishna Kanth Reddy 
11778ff96bbSKrishna Kanth Reddy static void
11878ff96bbSKrishna Kanth Reddy bdev_xnvme_destruct_cb(void *io_device)
11978ff96bbSKrishna Kanth Reddy {
12078ff96bbSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme = io_device;
12178ff96bbSKrishna Kanth Reddy 
12278ff96bbSKrishna Kanth Reddy 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
12378ff96bbSKrishna Kanth Reddy 	bdev_xnvme_free(xnvme);
12478ff96bbSKrishna Kanth Reddy }
12578ff96bbSKrishna Kanth Reddy 
1266f338d4bSKrishna Kanth Reddy static int
1276f338d4bSKrishna Kanth Reddy bdev_xnvme_destruct(void *ctx)
1286f338d4bSKrishna Kanth Reddy {
1296f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme = ctx;
1306f338d4bSKrishna Kanth Reddy 
13178ff96bbSKrishna Kanth Reddy 	spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb);
1326f338d4bSKrishna Kanth Reddy 
133fc3a8514Syidong0635 	return 0;
1346f338d4bSKrishna Kanth Reddy }
1356f338d4bSKrishna Kanth Reddy 
136f999d891SNadja Brix Koch static int
137f999d891SNadja Brix Koch bdev_xnvme_unmap(struct spdk_bdev_io *bdev_io, struct xnvme_cmd_ctx *ctx, struct bdev_xnvme *xnvme)
138f999d891SNadja Brix Koch {
139f999d891SNadja Brix Koch 	struct spdk_nvme_dsm_range *range;
140f999d891SNadja Brix Koch 	uint64_t offset, remaining;
141f999d891SNadja Brix Koch 	uint64_t num_ranges_u64, num_blocks, offset_blocks;
142f999d891SNadja Brix Koch 	uint16_t num_ranges;
143f999d891SNadja Brix Koch 
144f999d891SNadja Brix Koch 	num_blocks = bdev_io->u.bdev.num_blocks;
145f999d891SNadja Brix Koch 	offset_blocks = bdev_io->u.bdev.offset_blocks;
146f999d891SNadja Brix Koch 
147f999d891SNadja Brix Koch 	num_ranges_u64 = spdk_divide_round_up(num_blocks, xnvme->bdev.max_unmap);
148f999d891SNadja Brix Koch 	if (num_ranges_u64 > xnvme->bdev.max_unmap_segments) {
149f999d891SNadja Brix Koch 		SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
150f999d891SNadja Brix Koch 		return -EINVAL;
151f999d891SNadja Brix Koch 	}
152f999d891SNadja Brix Koch 	num_ranges = (uint16_t)num_ranges_u64;
153f999d891SNadja Brix Koch 
154f999d891SNadja Brix Koch 	offset = offset_blocks;
155f999d891SNadja Brix Koch 	remaining = num_blocks;
156f999d891SNadja Brix Koch 
157f999d891SNadja Brix Koch 	assert(bdev_io->u.bdev.iovcnt == 1);
158f999d891SNadja Brix Koch 	range = (struct spdk_nvme_dsm_range *) bdev_io->u.bdev.iovs->iov_base;
159f999d891SNadja Brix Koch 
160f999d891SNadja Brix Koch 	/* Fill max-size ranges until the remaining blocks fit into one range */
161f999d891SNadja Brix Koch 	while (remaining > xnvme->bdev.max_unmap) {
162f999d891SNadja Brix Koch 		range->attributes.raw = 0;
163f999d891SNadja Brix Koch 		range->length = xnvme->bdev.max_unmap;
164f999d891SNadja Brix Koch 		range->starting_lba = offset;
165f999d891SNadja Brix Koch 
166f999d891SNadja Brix Koch 		offset += xnvme->bdev.max_unmap;
167f999d891SNadja Brix Koch 		remaining -= xnvme->bdev.max_unmap;
168f999d891SNadja Brix Koch 		range++;
169f999d891SNadja Brix Koch 	}
170f999d891SNadja Brix Koch 
171f999d891SNadja Brix Koch 	/* Final range describes the remaining blocks */
172f999d891SNadja Brix Koch 	range->attributes.raw = 0;
173f999d891SNadja Brix Koch 	range->length = remaining;
174f999d891SNadja Brix Koch 	range->starting_lba = offset;
175f999d891SNadja Brix Koch 
176f999d891SNadja Brix Koch 	ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_DATASET_MANAGEMENT;
177f999d891SNadja Brix Koch 	ctx->cmd.common.nsid = xnvme->nsid;
178f999d891SNadja Brix Koch 	ctx->cmd.nvm.nlb = num_blocks - 1;
179f999d891SNadja Brix Koch 	ctx->cmd.nvm.slba = offset_blocks;
180f999d891SNadja Brix Koch 	ctx->cmd.dsm.nr = num_ranges - 1;
181f999d891SNadja Brix Koch 	ctx->cmd.dsm.ad = true;
182f999d891SNadja Brix Koch 
183f999d891SNadja Brix Koch 	return 0;
184f999d891SNadja Brix Koch }
185f999d891SNadja Brix Koch 
1866f338d4bSKrishna Kanth Reddy static void
1875c678b79SKarl Bonde Torp _xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1886f338d4bSKrishna Kanth Reddy {
1896f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
1906f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
1916f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
1926f338d4bSKrishna Kanth Reddy 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
1936f338d4bSKrishna Kanth Reddy 	int err;
1946f338d4bSKrishna Kanth Reddy 
1956f338d4bSKrishna Kanth Reddy 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
1966f338d4bSKrishna Kanth Reddy 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
1976f338d4bSKrishna Kanth Reddy 
1986f338d4bSKrishna Kanth Reddy 	switch (bdev_io->type) {
1996f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_READ:
2006f338d4bSKrishna Kanth Reddy 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
2016f338d4bSKrishna Kanth Reddy 		ctx->cmd.common.nsid = xnvme->nsid;
2026f338d4bSKrishna Kanth Reddy 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
2036f338d4bSKrishna Kanth Reddy 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
2046f338d4bSKrishna Kanth Reddy 		break;
2056f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_WRITE:
2066f338d4bSKrishna Kanth Reddy 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
2076f338d4bSKrishna Kanth Reddy 		ctx->cmd.common.nsid = xnvme->nsid;
2086f338d4bSKrishna Kanth Reddy 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
2096f338d4bSKrishna Kanth Reddy 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
2106f338d4bSKrishna Kanth Reddy 		break;
211e3dde60aSKarl Bonde Torp 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
212e3dde60aSKarl Bonde Torp 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE_ZEROES;
213e3dde60aSKarl Bonde Torp 		ctx->cmd.common.nsid = xnvme->nsid;
214e3dde60aSKarl Bonde Torp 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
215e3dde60aSKarl Bonde Torp 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
216e3dde60aSKarl Bonde Torp 		break;
217f999d891SNadja Brix Koch 	case SPDK_BDEV_IO_TYPE_UNMAP:
218f999d891SNadja Brix Koch 		if (bdev_xnvme_unmap(bdev_io, ctx, xnvme)) {
219f999d891SNadja Brix Koch 			xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
220f999d891SNadja Brix Koch 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
221f999d891SNadja Brix Koch 			return;
222f999d891SNadja Brix Koch 		}
223f999d891SNadja Brix Koch 		break;
2246f338d4bSKrishna Kanth Reddy 	default:
2256f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Wrong io type\n");
2266f338d4bSKrishna Kanth Reddy 
2276f338d4bSKrishna Kanth Reddy 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
2286f338d4bSKrishna Kanth Reddy 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
2296f338d4bSKrishna Kanth Reddy 		return;
2306f338d4bSKrishna Kanth Reddy 	}
2316f338d4bSKrishna Kanth Reddy 
2326f338d4bSKrishna Kanth Reddy 	xnvme_task->ch = xnvme_ch;
2336f338d4bSKrishna Kanth Reddy 	ctx->async.cb_arg = xnvme_task;
2346f338d4bSKrishna Kanth Reddy 
2356f338d4bSKrishna Kanth Reddy 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
2366f338d4bSKrishna Kanth Reddy 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
2376f338d4bSKrishna Kanth Reddy 
2386f338d4bSKrishna Kanth Reddy 	switch (err) {
2396f338d4bSKrishna Kanth Reddy 	/* Submission success! */
2406f338d4bSKrishna Kanth Reddy 	case 0:
2416f338d4bSKrishna Kanth Reddy 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
2426f338d4bSKrishna Kanth Reddy 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
2436f338d4bSKrishna Kanth Reddy 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
2446f338d4bSKrishna Kanth Reddy 		return;
2456f338d4bSKrishna Kanth Reddy 
2466f338d4bSKrishna Kanth Reddy 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
2476f338d4bSKrishna Kanth Reddy 	case -EBUSY:
2486f338d4bSKrishna Kanth Reddy 	case -EAGAIN:
2496f338d4bSKrishna Kanth Reddy 	case -ENOMEM:
2506f338d4bSKrishna Kanth Reddy 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
2516f338d4bSKrishna Kanth Reddy 
2526f338d4bSKrishna Kanth Reddy 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
2536f338d4bSKrishna Kanth Reddy 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
2546f338d4bSKrishna Kanth Reddy 		return;
2556f338d4bSKrishna Kanth Reddy 
2566f338d4bSKrishna Kanth Reddy 	/* Submission failed: unexpected error, put the command-context back in the queue */
2576f338d4bSKrishna Kanth Reddy 	default:
2586f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
2596f338d4bSKrishna Kanth Reddy 
2606f338d4bSKrishna Kanth Reddy 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
2616f338d4bSKrishna Kanth Reddy 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
2626f338d4bSKrishna Kanth Reddy 		return;
2636f338d4bSKrishna Kanth Reddy 	}
2646f338d4bSKrishna Kanth Reddy }
2656f338d4bSKrishna Kanth Reddy 
2666f338d4bSKrishna Kanth Reddy static void
2675c678b79SKarl Bonde Torp bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
2685c678b79SKarl Bonde Torp {
2695c678b79SKarl Bonde Torp 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
2705c678b79SKarl Bonde Torp 
2715c678b79SKarl Bonde Torp 	if (!success) {
2725c678b79SKarl Bonde Torp 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, xnvme_queue_get_cmd_ctx(xnvme_ch->queue));
2735c678b79SKarl Bonde Torp 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
2745c678b79SKarl Bonde Torp 		return;
2755c678b79SKarl Bonde Torp 	}
2765c678b79SKarl Bonde Torp 
2775c678b79SKarl Bonde Torp 	_xnvme_submit_request(ch, bdev_io);
2785c678b79SKarl Bonde Torp }
2795c678b79SKarl Bonde Torp 
2805c678b79SKarl Bonde Torp static void
2816f338d4bSKrishna Kanth Reddy bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
2826f338d4bSKrishna Kanth Reddy {
2836f338d4bSKrishna Kanth Reddy 	switch (bdev_io->type) {
2846f338d4bSKrishna Kanth Reddy 	/* Read and write operations must be performed on buffers aligned to
2856f338d4bSKrishna Kanth Reddy 	 * bdev->required_alignment. If user specified unaligned buffers,
2866f338d4bSKrishna Kanth Reddy 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
2876f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_READ:
2886f338d4bSKrishna Kanth Reddy 	case SPDK_BDEV_IO_TYPE_WRITE:
2896f338d4bSKrishna Kanth Reddy 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
2906f338d4bSKrishna Kanth Reddy 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
2916f338d4bSKrishna Kanth Reddy 		break;
292f999d891SNadja Brix Koch 	case SPDK_BDEV_IO_TYPE_UNMAP:
293f999d891SNadja Brix Koch 		/* The max number of segments defined by spec is 256 and an
294f999d891SNadja Brix Koch 		 * spdk_nvme_dsm_range structure is 16 bytes */
295f999d891SNadja Brix Koch 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 256 * 16);
296f999d891SNadja Brix Koch 		break;
297e3dde60aSKarl Bonde Torp 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
298e3dde60aSKarl Bonde Torp 		_xnvme_submit_request(ch, bdev_io);
299e3dde60aSKarl Bonde Torp 		break;
3006f338d4bSKrishna Kanth Reddy 
3016f338d4bSKrishna Kanth Reddy 	default:
3026f338d4bSKrishna Kanth Reddy 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
3036f338d4bSKrishna Kanth Reddy 		break;
3046f338d4bSKrishna Kanth Reddy 	}
3056f338d4bSKrishna Kanth Reddy }
3066f338d4bSKrishna Kanth Reddy 
3076f338d4bSKrishna Kanth Reddy static const struct spdk_bdev_fn_table xnvme_fn_table = {
3086f338d4bSKrishna Kanth Reddy 	.destruct		= bdev_xnvme_destruct,
3096f338d4bSKrishna Kanth Reddy 	.submit_request		= bdev_xnvme_submit_request,
3106f338d4bSKrishna Kanth Reddy 	.io_type_supported	= bdev_xnvme_io_type_supported,
3116f338d4bSKrishna Kanth Reddy 	.get_io_channel		= bdev_xnvme_get_io_channel,
3126f338d4bSKrishna Kanth Reddy };
3136f338d4bSKrishna Kanth Reddy 
3146f338d4bSKrishna Kanth Reddy static void
3156f338d4bSKrishna Kanth Reddy bdev_xnvme_free(struct bdev_xnvme *xnvme)
3166f338d4bSKrishna Kanth Reddy {
3176f338d4bSKrishna Kanth Reddy 	assert(xnvme != NULL);
3186f338d4bSKrishna Kanth Reddy 
31943c5293eSyidong0635 	xnvme_dev_close(xnvme->dev);
32069bcb185STomasz Zawadzki 	free(xnvme->io_mechanism);
3216f338d4bSKrishna Kanth Reddy 	free(xnvme->filename);
3226f338d4bSKrishna Kanth Reddy 	free(xnvme->bdev.name);
3236f338d4bSKrishna Kanth Reddy 	free(xnvme);
3246f338d4bSKrishna Kanth Reddy }
3256f338d4bSKrishna Kanth Reddy 
3266f338d4bSKrishna Kanth Reddy static void
3276f338d4bSKrishna Kanth Reddy bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
3286f338d4bSKrishna Kanth Reddy {
3296f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
3306f338d4bSKrishna Kanth Reddy 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
3316f338d4bSKrishna Kanth Reddy 
3326f338d4bSKrishna Kanth Reddy 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
3336f338d4bSKrishna Kanth Reddy 
3346f338d4bSKrishna Kanth Reddy 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
3356f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("xNVMe I/O Failed\n");
3366f338d4bSKrishna Kanth Reddy 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
3376f338d4bSKrishna Kanth Reddy 		status = SPDK_BDEV_IO_STATUS_FAILED;
3386f338d4bSKrishna Kanth Reddy 	}
3396f338d4bSKrishna Kanth Reddy 
3406f338d4bSKrishna Kanth Reddy 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
3416f338d4bSKrishna Kanth Reddy 
3426f338d4bSKrishna Kanth Reddy 	/* Completed: Put the command- context back in the queue */
3436f338d4bSKrishna Kanth Reddy 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
3446f338d4bSKrishna Kanth Reddy }
3456f338d4bSKrishna Kanth Reddy 
3466f338d4bSKrishna Kanth Reddy static int
3476f338d4bSKrishna Kanth Reddy bdev_xnvme_poll(void *arg)
3486f338d4bSKrishna Kanth Reddy {
3496f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_io_channel *ch = arg;
3506f338d4bSKrishna Kanth Reddy 	int rc;
3516f338d4bSKrishna Kanth Reddy 
3526f338d4bSKrishna Kanth Reddy 	rc = xnvme_queue_poke(ch->queue, 0);
3536f338d4bSKrishna Kanth Reddy 	if (rc < 0) {
3546f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
3556f338d4bSKrishna Kanth Reddy 		return SPDK_POLLER_BUSY;
3566f338d4bSKrishna Kanth Reddy 	}
3576f338d4bSKrishna Kanth Reddy 
3586f338d4bSKrishna Kanth Reddy 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
3596f338d4bSKrishna Kanth Reddy }
3606f338d4bSKrishna Kanth Reddy 
3616f338d4bSKrishna Kanth Reddy static int
3626f338d4bSKrishna Kanth Reddy bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
3636f338d4bSKrishna Kanth Reddy {
3646f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme = io_device;
3656f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_io_channel *ch = ctx_buf;
3666f338d4bSKrishna Kanth Reddy 	int rc;
3676f338d4bSKrishna Kanth Reddy 	int qd = 512;
3686f338d4bSKrishna Kanth Reddy 
3696f338d4bSKrishna Kanth Reddy 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
3706f338d4bSKrishna Kanth Reddy 	if (rc) {
3716f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
3726f338d4bSKrishna Kanth Reddy 		return 1;
3736f338d4bSKrishna Kanth Reddy 	}
3746f338d4bSKrishna Kanth Reddy 
3756f338d4bSKrishna Kanth Reddy 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
3766f338d4bSKrishna Kanth Reddy 
3776f338d4bSKrishna Kanth Reddy 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
3786f338d4bSKrishna Kanth Reddy 
3796f338d4bSKrishna Kanth Reddy 	return 0;
3806f338d4bSKrishna Kanth Reddy }
3816f338d4bSKrishna Kanth Reddy 
3826f338d4bSKrishna Kanth Reddy static void
3836f338d4bSKrishna Kanth Reddy bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
3846f338d4bSKrishna Kanth Reddy {
3856f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme_io_channel *ch = ctx_buf;
3866f338d4bSKrishna Kanth Reddy 
3876f338d4bSKrishna Kanth Reddy 	spdk_poller_unregister(&ch->poller);
3886f338d4bSKrishna Kanth Reddy 
3896f338d4bSKrishna Kanth Reddy 	xnvme_queue_term(ch->queue);
3906f338d4bSKrishna Kanth Reddy }
3916f338d4bSKrishna Kanth Reddy 
3926f338d4bSKrishna Kanth Reddy struct spdk_bdev *
393b99b00e5SSimon A. F. Lund create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism,
394b99b00e5SSimon A. F. Lund 		  bool conserve_cpu)
3956f338d4bSKrishna Kanth Reddy {
3966f338d4bSKrishna Kanth Reddy 	struct bdev_xnvme *xnvme;
397f999d891SNadja Brix Koch 	const struct xnvme_spec_nvm_idfy_ctrlr *ctrlr;
3986f338d4bSKrishna Kanth Reddy 	uint32_t block_size;
3996f338d4bSKrishna Kanth Reddy 	uint64_t bdev_size;
4006f338d4bSKrishna Kanth Reddy 	int rc;
4016f338d4bSKrishna Kanth Reddy 	struct xnvme_opts opts = xnvme_opts_default();
4026f338d4bSKrishna Kanth Reddy 
4036f338d4bSKrishna Kanth Reddy 	xnvme = calloc(1, sizeof(*xnvme));
4046f338d4bSKrishna Kanth Reddy 	if (!xnvme) {
4056f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
4066f338d4bSKrishna Kanth Reddy 		return NULL;
4076f338d4bSKrishna Kanth Reddy 	}
4086f338d4bSKrishna Kanth Reddy 
40917948bcaSKrishna Kanth Reddy 	opts.direct = 1;
4106f338d4bSKrishna Kanth Reddy 	opts.async = io_mechanism;
4116f338d4bSKrishna Kanth Reddy 	if (!opts.async) {
4126f338d4bSKrishna Kanth Reddy 		goto error_return;
4136f338d4bSKrishna Kanth Reddy 	}
41469bcb185STomasz Zawadzki 	xnvme->io_mechanism = strdup(io_mechanism);
41569bcb185STomasz Zawadzki 	if (!xnvme->io_mechanism) {
41669bcb185STomasz Zawadzki 		goto error_return;
41769bcb185STomasz Zawadzki 	}
4186f338d4bSKrishna Kanth Reddy 
419*1c2e5edaSMichal Berger 	xnvme->conserve_cpu = conserve_cpu;
420*1c2e5edaSMichal Berger 	if (!xnvme->conserve_cpu) {
421b99b00e5SSimon A. F. Lund 		if (!strcmp(xnvme->io_mechanism, "libaio")) {
422b99b00e5SSimon A. F. Lund 			opts.poll_io = 1;
423b99b00e5SSimon A. F. Lund 		} else if (!strcmp(xnvme->io_mechanism, "io_uring")) {
424b99b00e5SSimon A. F. Lund 			opts.poll_io = 1;
425b99b00e5SSimon A. F. Lund 		} else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
4268f243d69SKarl Bonde Torp 			opts.poll_io = 1;
427cfe5d188SSimon A. F. Lund 		}
428b99b00e5SSimon A. F. Lund 	}
429cfe5d188SSimon A. F. Lund 
4306f338d4bSKrishna Kanth Reddy 	xnvme->filename = strdup(filename);
4316f338d4bSKrishna Kanth Reddy 	if (!xnvme->filename) {
4326f338d4bSKrishna Kanth Reddy 		goto error_return;
4336f338d4bSKrishna Kanth Reddy 	}
4346f338d4bSKrishna Kanth Reddy 
4356f338d4bSKrishna Kanth Reddy 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
4366f338d4bSKrishna Kanth Reddy 	if (!xnvme->dev) {
4376f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
4386f338d4bSKrishna Kanth Reddy 		goto error_return;
4396f338d4bSKrishna Kanth Reddy 	}
4406f338d4bSKrishna Kanth Reddy 
4416f338d4bSKrishna Kanth Reddy 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
4426f338d4bSKrishna Kanth Reddy 
4436f338d4bSKrishna Kanth Reddy 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
4446f338d4bSKrishna Kanth Reddy 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
4456f338d4bSKrishna Kanth Reddy 
4466f338d4bSKrishna Kanth Reddy 	xnvme->bdev.name = strdup(name);
4476f338d4bSKrishna Kanth Reddy 	if (!xnvme->bdev.name) {
4486f338d4bSKrishna Kanth Reddy 		goto error_return;
4496f338d4bSKrishna Kanth Reddy 	}
4506f338d4bSKrishna Kanth Reddy 
4516f338d4bSKrishna Kanth Reddy 	xnvme->bdev.product_name = "xNVMe bdev";
4526f338d4bSKrishna Kanth Reddy 	xnvme->bdev.module = &xnvme_if;
4536f338d4bSKrishna Kanth Reddy 
4546f338d4bSKrishna Kanth Reddy 	xnvme->bdev.write_cache = 0;
455e3dde60aSKarl Bonde Torp 	xnvme->bdev.max_write_zeroes = UINT16_MAX + 1;
4566f338d4bSKrishna Kanth Reddy 
457f999d891SNadja Brix Koch 	if (xnvme_dev_get_csi(xnvme->dev) == XNVME_SPEC_CSI_NVM) {
458f999d891SNadja Brix Koch 		ctrlr = (struct xnvme_spec_nvm_idfy_ctrlr *) xnvme_dev_get_ctrlr_css(xnvme->dev);
459f999d891SNadja Brix Koch 		xnvme->bdev.max_unmap = ctrlr->dmrsl ? ctrlr->dmrsl : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
460f999d891SNadja Brix Koch 		xnvme->bdev.max_unmap_segments = ctrlr->dmrl ? ctrlr->dmrl :
461f999d891SNadja Brix Koch 						 SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES;
462f999d891SNadja Brix Koch 	}
463f999d891SNadja Brix Koch 
4646f338d4bSKrishna Kanth Reddy 	if (block_size == 0) {
4656f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Block size could not be auto-detected\n");
4666f338d4bSKrishna Kanth Reddy 		goto error_return;
4676f338d4bSKrishna Kanth Reddy 	}
4686f338d4bSKrishna Kanth Reddy 
4696f338d4bSKrishna Kanth Reddy 	if (block_size < 512) {
4706f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
4716f338d4bSKrishna Kanth Reddy 		goto error_return;
4726f338d4bSKrishna Kanth Reddy 	}
4736f338d4bSKrishna Kanth Reddy 
4746f338d4bSKrishna Kanth Reddy 	if (!spdk_u32_is_pow2(block_size)) {
4756f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
4766f338d4bSKrishna Kanth Reddy 		goto error_return;
4776f338d4bSKrishna Kanth Reddy 	}
4786f338d4bSKrishna Kanth Reddy 
4796f338d4bSKrishna Kanth Reddy 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
4806f338d4bSKrishna Kanth Reddy 		      xnvme->bdev.name, bdev_size, block_size);
4816f338d4bSKrishna Kanth Reddy 
4826f338d4bSKrishna Kanth Reddy 	xnvme->bdev.blocklen = block_size;
4836f338d4bSKrishna Kanth Reddy 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
4846f338d4bSKrishna Kanth Reddy 
4856f338d4bSKrishna Kanth Reddy 	if (bdev_size % xnvme->bdev.blocklen != 0) {
4866f338d4bSKrishna Kanth Reddy 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
4876f338d4bSKrishna Kanth Reddy 			    bdev_size, xnvme->bdev.blocklen);
4886f338d4bSKrishna Kanth Reddy 		goto error_return;
4896f338d4bSKrishna Kanth Reddy 	}
4906f338d4bSKrishna Kanth Reddy 
4916f338d4bSKrishna Kanth Reddy 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
4926f338d4bSKrishna Kanth Reddy 	xnvme->bdev.ctxt = xnvme;
4936f338d4bSKrishna Kanth Reddy 
4946f338d4bSKrishna Kanth Reddy 	xnvme->bdev.fn_table = &xnvme_fn_table;
4956f338d4bSKrishna Kanth Reddy 
4966f338d4bSKrishna Kanth Reddy 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
4976f338d4bSKrishna Kanth Reddy 				sizeof(struct bdev_xnvme_io_channel),
4986f338d4bSKrishna Kanth Reddy 				xnvme->bdev.name);
4996f338d4bSKrishna Kanth Reddy 	rc = spdk_bdev_register(&xnvme->bdev);
5006f338d4bSKrishna Kanth Reddy 	if (rc) {
5016f338d4bSKrishna Kanth Reddy 		spdk_io_device_unregister(xnvme, NULL);
5026f338d4bSKrishna Kanth Reddy 		goto error_return;
5036f338d4bSKrishna Kanth Reddy 	}
5046f338d4bSKrishna Kanth Reddy 
5056f338d4bSKrishna Kanth Reddy 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
5066f338d4bSKrishna Kanth Reddy 
5076f338d4bSKrishna Kanth Reddy 	return &xnvme->bdev;
5086f338d4bSKrishna Kanth Reddy 
5096f338d4bSKrishna Kanth Reddy error_return:
5106f338d4bSKrishna Kanth Reddy 	bdev_xnvme_free(xnvme);
5116f338d4bSKrishna Kanth Reddy 	return NULL;
5126f338d4bSKrishna Kanth Reddy }
5136f338d4bSKrishna Kanth Reddy 
5146f338d4bSKrishna Kanth Reddy void
5153c5fdc06SShuhei Matsumoto delete_xnvme_bdev(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
5166f338d4bSKrishna Kanth Reddy {
5173c5fdc06SShuhei Matsumoto 	int rc;
5186f338d4bSKrishna Kanth Reddy 
5193c5fdc06SShuhei Matsumoto 	rc = spdk_bdev_unregister_by_name(name, &xnvme_if, cb_fn, cb_arg);
5203c5fdc06SShuhei Matsumoto 	if (rc != 0) {
5213c5fdc06SShuhei Matsumoto 		cb_fn(cb_arg, rc);
5226f338d4bSKrishna Kanth Reddy 	}
5236f338d4bSKrishna Kanth Reddy }
5246f338d4bSKrishna Kanth Reddy 
5256f338d4bSKrishna Kanth Reddy static int
5266f338d4bSKrishna Kanth Reddy bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
5276f338d4bSKrishna Kanth Reddy {
5286f338d4bSKrishna Kanth Reddy 	return 0;
5296f338d4bSKrishna Kanth Reddy }
5306f338d4bSKrishna Kanth Reddy 
5316f338d4bSKrishna Kanth Reddy static void
5326f338d4bSKrishna Kanth Reddy bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
5336f338d4bSKrishna Kanth Reddy {
5346f338d4bSKrishna Kanth Reddy }
5356f338d4bSKrishna Kanth Reddy 
5366f338d4bSKrishna Kanth Reddy static int
5376f338d4bSKrishna Kanth Reddy bdev_xnvme_init(void)
5386f338d4bSKrishna Kanth Reddy {
5396f338d4bSKrishna Kanth Reddy 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
5406f338d4bSKrishna Kanth Reddy 				0, "xnvme_module");
5416f338d4bSKrishna Kanth Reddy 
5426f338d4bSKrishna Kanth Reddy 	return 0;
5436f338d4bSKrishna Kanth Reddy }
5446f338d4bSKrishna Kanth Reddy 
5456f338d4bSKrishna Kanth Reddy static void
5466f338d4bSKrishna Kanth Reddy bdev_xnvme_fini(void)
5476f338d4bSKrishna Kanth Reddy {
5486f338d4bSKrishna Kanth Reddy 	spdk_io_device_unregister(&xnvme_if, NULL);
5496f338d4bSKrishna Kanth Reddy }
5506f338d4bSKrishna Kanth Reddy 
5516f338d4bSKrishna Kanth Reddy SPDK_LOG_REGISTER_COMPONENT(xnvme)
552