16f338d4bSKrishna Kanth Reddy /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2022 Intel Corporation. 36f338d4bSKrishna Kanth Reddy * Copyright (c) Samsung Electronics Co., Ltd. 46f338d4bSKrishna Kanth Reddy * All rights reserved. 56f338d4bSKrishna Kanth Reddy */ 66f338d4bSKrishna Kanth Reddy 76f338d4bSKrishna Kanth Reddy #include "libxnvme.h" 86f338d4bSKrishna Kanth Reddy 96f338d4bSKrishna Kanth Reddy #include "bdev_xnvme.h" 106f338d4bSKrishna Kanth Reddy 116f338d4bSKrishna Kanth Reddy #include "spdk/stdinc.h" 126f338d4bSKrishna Kanth Reddy 136f338d4bSKrishna Kanth Reddy #include "spdk/barrier.h" 146f338d4bSKrishna Kanth Reddy #include "spdk/bdev.h" 156f338d4bSKrishna Kanth Reddy #include "spdk/env.h" 166f338d4bSKrishna Kanth Reddy #include "spdk/fd.h" 176f338d4bSKrishna Kanth Reddy #include "spdk/likely.h" 186f338d4bSKrishna Kanth Reddy #include "spdk/thread.h" 196f338d4bSKrishna Kanth Reddy #include "spdk/json.h" 206f338d4bSKrishna Kanth Reddy #include "spdk/util.h" 216f338d4bSKrishna Kanth Reddy #include "spdk/string.h" 226f338d4bSKrishna Kanth Reddy 236f338d4bSKrishna Kanth Reddy #include "spdk/log.h" 246f338d4bSKrishna Kanth Reddy 256f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel { 266f338d4bSKrishna Kanth Reddy struct xnvme_queue *queue; 276f338d4bSKrishna Kanth Reddy struct spdk_poller *poller; 286f338d4bSKrishna Kanth Reddy }; 296f338d4bSKrishna Kanth Reddy 306f338d4bSKrishna Kanth Reddy struct bdev_xnvme_task { 316f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel *ch; 326f338d4bSKrishna Kanth Reddy TAILQ_ENTRY(bdev_xnvme_task) link; 336f338d4bSKrishna Kanth Reddy }; 346f338d4bSKrishna Kanth Reddy 356f338d4bSKrishna Kanth Reddy struct bdev_xnvme { 366f338d4bSKrishna Kanth Reddy struct spdk_bdev bdev; 376f338d4bSKrishna Kanth Reddy char *filename; 3869bcb185STomasz Zawadzki char *io_mechanism; 396f338d4bSKrishna Kanth Reddy struct xnvme_dev *dev; 406f338d4bSKrishna Kanth Reddy uint32_t nsid; 41b99b00e5SSimon A. F. Lund bool conserve_cpu; 426f338d4bSKrishna Kanth Reddy 436f338d4bSKrishna Kanth Reddy TAILQ_ENTRY(bdev_xnvme) link; 446f338d4bSKrishna Kanth Reddy }; 456f338d4bSKrishna Kanth Reddy 466f338d4bSKrishna Kanth Reddy static int bdev_xnvme_init(void); 476f338d4bSKrishna Kanth Reddy static void bdev_xnvme_fini(void); 486f338d4bSKrishna Kanth Reddy static void bdev_xnvme_free(struct bdev_xnvme *xnvme); 496f338d4bSKrishna Kanth Reddy static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head); 506f338d4bSKrishna Kanth Reddy 516f338d4bSKrishna Kanth Reddy static int 526f338d4bSKrishna Kanth Reddy bdev_xnvme_get_ctx_size(void) 536f338d4bSKrishna Kanth Reddy { 546f338d4bSKrishna Kanth Reddy return sizeof(struct bdev_xnvme_task); 556f338d4bSKrishna Kanth Reddy } 566f338d4bSKrishna Kanth Reddy 5769bcb185STomasz Zawadzki static int 5869bcb185STomasz Zawadzki bdev_xnvme_config_json(struct spdk_json_write_ctx *w) 5969bcb185STomasz Zawadzki { 6069bcb185STomasz Zawadzki struct bdev_xnvme *xnvme; 6169bcb185STomasz Zawadzki 6269bcb185STomasz Zawadzki TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) { 6369bcb185STomasz Zawadzki spdk_json_write_object_begin(w); 6469bcb185STomasz Zawadzki 6569bcb185STomasz Zawadzki spdk_json_write_named_string(w, "method", "bdev_xnvme_create"); 6669bcb185STomasz Zawadzki 6769bcb185STomasz Zawadzki spdk_json_write_named_object_begin(w, "params"); 6869bcb185STomasz Zawadzki spdk_json_write_named_string(w, "name", xnvme->bdev.name); 6969bcb185STomasz Zawadzki spdk_json_write_named_string(w, "filename", xnvme->filename); 7069bcb185STomasz Zawadzki spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); 71b99b00e5SSimon A. F. Lund spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu); 7269bcb185STomasz Zawadzki spdk_json_write_object_end(w); 7369bcb185STomasz Zawadzki 7469bcb185STomasz Zawadzki spdk_json_write_object_end(w); 7569bcb185STomasz Zawadzki } 7669bcb185STomasz Zawadzki 7769bcb185STomasz Zawadzki return 0; 7869bcb185STomasz Zawadzki } 7969bcb185STomasz Zawadzki 806f338d4bSKrishna Kanth Reddy static struct spdk_bdev_module xnvme_if = { 816f338d4bSKrishna Kanth Reddy .name = "xnvme", 826f338d4bSKrishna Kanth Reddy .module_init = bdev_xnvme_init, 836f338d4bSKrishna Kanth Reddy .module_fini = bdev_xnvme_fini, 846f338d4bSKrishna Kanth Reddy .get_ctx_size = bdev_xnvme_get_ctx_size, 8569bcb185STomasz Zawadzki .config_json = bdev_xnvme_config_json, 866f338d4bSKrishna Kanth Reddy }; 876f338d4bSKrishna Kanth Reddy 886f338d4bSKrishna Kanth Reddy SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if) 896f338d4bSKrishna Kanth Reddy 906f338d4bSKrishna Kanth Reddy static struct spdk_io_channel * 916f338d4bSKrishna Kanth Reddy bdev_xnvme_get_io_channel(void *ctx) 926f338d4bSKrishna Kanth Reddy { 936f338d4bSKrishna Kanth Reddy struct bdev_xnvme *xnvme = ctx; 946f338d4bSKrishna Kanth Reddy 956f338d4bSKrishna Kanth Reddy return spdk_get_io_channel(xnvme); 966f338d4bSKrishna Kanth Reddy } 976f338d4bSKrishna Kanth Reddy 986f338d4bSKrishna Kanth Reddy static bool 996f338d4bSKrishna Kanth Reddy bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 1006f338d4bSKrishna Kanth Reddy { 101f999d891SNadja Brix Koch struct bdev_xnvme *xnvme = ctx; 102f999d891SNadja Brix Koch 1036f338d4bSKrishna Kanth Reddy switch (io_type) { 1046f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_READ: 1056f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_WRITE: 1066f338d4bSKrishna Kanth Reddy return true; 107f999d891SNadja Brix Koch case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 108f999d891SNadja Brix Koch case SPDK_BDEV_IO_TYPE_UNMAP: 109f999d891SNadja Brix Koch /* libaio and io_uring only supports read and write */ 110f999d891SNadja Brix Koch return !strcmp(xnvme->io_mechanism, "io_uring_cmd") && 111f999d891SNadja Brix Koch xnvme_dev_get_csi(xnvme->dev) == XNVME_SPEC_CSI_NVM; 1126f338d4bSKrishna Kanth Reddy default: 1136f338d4bSKrishna Kanth Reddy return false; 1146f338d4bSKrishna Kanth Reddy } 1156f338d4bSKrishna Kanth Reddy } 1166f338d4bSKrishna Kanth Reddy 11778ff96bbSKrishna Kanth Reddy static void 11878ff96bbSKrishna Kanth Reddy bdev_xnvme_destruct_cb(void *io_device) 11978ff96bbSKrishna Kanth Reddy { 12078ff96bbSKrishna Kanth Reddy struct bdev_xnvme *xnvme = io_device; 12178ff96bbSKrishna Kanth Reddy 12278ff96bbSKrishna Kanth Reddy TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link); 12378ff96bbSKrishna Kanth Reddy bdev_xnvme_free(xnvme); 12478ff96bbSKrishna Kanth Reddy } 12578ff96bbSKrishna Kanth Reddy 1266f338d4bSKrishna Kanth Reddy static int 1276f338d4bSKrishna Kanth Reddy bdev_xnvme_destruct(void *ctx) 1286f338d4bSKrishna Kanth Reddy { 1296f338d4bSKrishna Kanth Reddy struct bdev_xnvme *xnvme = ctx; 1306f338d4bSKrishna Kanth Reddy 13178ff96bbSKrishna Kanth Reddy spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb); 1326f338d4bSKrishna Kanth Reddy 133fc3a8514Syidong0635 return 0; 1346f338d4bSKrishna Kanth Reddy } 1356f338d4bSKrishna Kanth Reddy 136f999d891SNadja Brix Koch static int 137f999d891SNadja Brix Koch bdev_xnvme_unmap(struct spdk_bdev_io *bdev_io, struct xnvme_cmd_ctx *ctx, struct bdev_xnvme *xnvme) 138f999d891SNadja Brix Koch { 139f999d891SNadja Brix Koch struct spdk_nvme_dsm_range *range; 140f999d891SNadja Brix Koch uint64_t offset, remaining; 141f999d891SNadja Brix Koch uint64_t num_ranges_u64, num_blocks, offset_blocks; 142f999d891SNadja Brix Koch uint16_t num_ranges; 143f999d891SNadja Brix Koch 144f999d891SNadja Brix Koch num_blocks = bdev_io->u.bdev.num_blocks; 145f999d891SNadja Brix Koch offset_blocks = bdev_io->u.bdev.offset_blocks; 146f999d891SNadja Brix Koch 147f999d891SNadja Brix Koch num_ranges_u64 = spdk_divide_round_up(num_blocks, xnvme->bdev.max_unmap); 148f999d891SNadja Brix Koch if (num_ranges_u64 > xnvme->bdev.max_unmap_segments) { 149f999d891SNadja Brix Koch SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 150f999d891SNadja Brix Koch return -EINVAL; 151f999d891SNadja Brix Koch } 152f999d891SNadja Brix Koch num_ranges = (uint16_t)num_ranges_u64; 153f999d891SNadja Brix Koch 154f999d891SNadja Brix Koch offset = offset_blocks; 155f999d891SNadja Brix Koch remaining = num_blocks; 156f999d891SNadja Brix Koch 157f999d891SNadja Brix Koch assert(bdev_io->u.bdev.iovcnt == 1); 158f999d891SNadja Brix Koch range = (struct spdk_nvme_dsm_range *) bdev_io->u.bdev.iovs->iov_base; 159f999d891SNadja Brix Koch 160f999d891SNadja Brix Koch /* Fill max-size ranges until the remaining blocks fit into one range */ 161f999d891SNadja Brix Koch while (remaining > xnvme->bdev.max_unmap) { 162f999d891SNadja Brix Koch range->attributes.raw = 0; 163f999d891SNadja Brix Koch range->length = xnvme->bdev.max_unmap; 164f999d891SNadja Brix Koch range->starting_lba = offset; 165f999d891SNadja Brix Koch 166f999d891SNadja Brix Koch offset += xnvme->bdev.max_unmap; 167f999d891SNadja Brix Koch remaining -= xnvme->bdev.max_unmap; 168f999d891SNadja Brix Koch range++; 169f999d891SNadja Brix Koch } 170f999d891SNadja Brix Koch 171f999d891SNadja Brix Koch /* Final range describes the remaining blocks */ 172f999d891SNadja Brix Koch range->attributes.raw = 0; 173f999d891SNadja Brix Koch range->length = remaining; 174f999d891SNadja Brix Koch range->starting_lba = offset; 175f999d891SNadja Brix Koch 176f999d891SNadja Brix Koch ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_DATASET_MANAGEMENT; 177f999d891SNadja Brix Koch ctx->cmd.common.nsid = xnvme->nsid; 178f999d891SNadja Brix Koch ctx->cmd.nvm.nlb = num_blocks - 1; 179f999d891SNadja Brix Koch ctx->cmd.nvm.slba = offset_blocks; 180f999d891SNadja Brix Koch ctx->cmd.dsm.nr = num_ranges - 1; 181f999d891SNadja Brix Koch ctx->cmd.dsm.ad = true; 182f999d891SNadja Brix Koch 183f999d891SNadja Brix Koch return 0; 184f999d891SNadja Brix Koch } 185f999d891SNadja Brix Koch 1866f338d4bSKrishna Kanth Reddy static void 1875c678b79SKarl Bonde Torp _xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 1886f338d4bSKrishna Kanth Reddy { 1896f338d4bSKrishna Kanth Reddy struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx; 1906f338d4bSKrishna Kanth Reddy struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt; 1916f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch); 1926f338d4bSKrishna Kanth Reddy struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue); 1936f338d4bSKrishna Kanth Reddy int err; 1946f338d4bSKrishna Kanth Reddy 1956f338d4bSKrishna Kanth Reddy SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n", 1966f338d4bSKrishna Kanth Reddy bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx); 1976f338d4bSKrishna Kanth Reddy 1986f338d4bSKrishna Kanth Reddy switch (bdev_io->type) { 1996f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_READ: 2006f338d4bSKrishna Kanth Reddy ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ; 2016f338d4bSKrishna Kanth Reddy ctx->cmd.common.nsid = xnvme->nsid; 2026f338d4bSKrishna Kanth Reddy ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 2036f338d4bSKrishna Kanth Reddy ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 2046f338d4bSKrishna Kanth Reddy break; 2056f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_WRITE: 2066f338d4bSKrishna Kanth Reddy ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE; 2076f338d4bSKrishna Kanth Reddy ctx->cmd.common.nsid = xnvme->nsid; 2086f338d4bSKrishna Kanth Reddy ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 2096f338d4bSKrishna Kanth Reddy ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 2106f338d4bSKrishna Kanth Reddy break; 211e3dde60aSKarl Bonde Torp case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 212e3dde60aSKarl Bonde Torp ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE_ZEROES; 213e3dde60aSKarl Bonde Torp ctx->cmd.common.nsid = xnvme->nsid; 214e3dde60aSKarl Bonde Torp ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 215e3dde60aSKarl Bonde Torp ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 216e3dde60aSKarl Bonde Torp break; 217f999d891SNadja Brix Koch case SPDK_BDEV_IO_TYPE_UNMAP: 218f999d891SNadja Brix Koch if (bdev_xnvme_unmap(bdev_io, ctx, xnvme)) { 219f999d891SNadja Brix Koch xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 220f999d891SNadja Brix Koch spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 221f999d891SNadja Brix Koch return; 222f999d891SNadja Brix Koch } 223f999d891SNadja Brix Koch break; 2246f338d4bSKrishna Kanth Reddy default: 2256f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Wrong io type\n"); 2266f338d4bSKrishna Kanth Reddy 2276f338d4bSKrishna Kanth Reddy xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 2286f338d4bSKrishna Kanth Reddy spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 2296f338d4bSKrishna Kanth Reddy return; 2306f338d4bSKrishna Kanth Reddy } 2316f338d4bSKrishna Kanth Reddy 2326f338d4bSKrishna Kanth Reddy xnvme_task->ch = xnvme_ch; 2336f338d4bSKrishna Kanth Reddy ctx->async.cb_arg = xnvme_task; 2346f338d4bSKrishna Kanth Reddy 2356f338d4bSKrishna Kanth Reddy err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2366f338d4bSKrishna Kanth Reddy bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0); 2376f338d4bSKrishna Kanth Reddy 2386f338d4bSKrishna Kanth Reddy switch (err) { 2396f338d4bSKrishna Kanth Reddy /* Submission success! */ 2406f338d4bSKrishna Kanth Reddy case 0: 2416f338d4bSKrishna Kanth Reddy SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n", 2426f338d4bSKrishna Kanth Reddy xnvme_ch, bdev_io->u.bdev.iovcnt, 2436f338d4bSKrishna Kanth Reddy bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); 2446f338d4bSKrishna Kanth Reddy return; 2456f338d4bSKrishna Kanth Reddy 2466f338d4bSKrishna Kanth Reddy /* Submission failed: queue is full or no memory => Queue the I/O in bdev layer */ 2476f338d4bSKrishna Kanth Reddy case -EBUSY: 2486f338d4bSKrishna Kanth Reddy case -EAGAIN: 2496f338d4bSKrishna Kanth Reddy case -ENOMEM: 2506f338d4bSKrishna Kanth Reddy SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n"); 2516f338d4bSKrishna Kanth Reddy 2526f338d4bSKrishna Kanth Reddy xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 2536f338d4bSKrishna Kanth Reddy spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 2546f338d4bSKrishna Kanth Reddy return; 2556f338d4bSKrishna Kanth Reddy 2566f338d4bSKrishna Kanth Reddy /* Submission failed: unexpected error, put the command-context back in the queue */ 2576f338d4bSKrishna Kanth Reddy default: 2586f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n"); 2596f338d4bSKrishna Kanth Reddy 2606f338d4bSKrishna Kanth Reddy xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 2616f338d4bSKrishna Kanth Reddy spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 2626f338d4bSKrishna Kanth Reddy return; 2636f338d4bSKrishna Kanth Reddy } 2646f338d4bSKrishna Kanth Reddy } 2656f338d4bSKrishna Kanth Reddy 2666f338d4bSKrishna Kanth Reddy static void 2675c678b79SKarl Bonde Torp bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 2685c678b79SKarl Bonde Torp { 2695c678b79SKarl Bonde Torp struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch); 2705c678b79SKarl Bonde Torp 2715c678b79SKarl Bonde Torp if (!success) { 2725c678b79SKarl Bonde Torp xnvme_queue_put_cmd_ctx(xnvme_ch->queue, xnvme_queue_get_cmd_ctx(xnvme_ch->queue)); 2735c678b79SKarl Bonde Torp spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 2745c678b79SKarl Bonde Torp return; 2755c678b79SKarl Bonde Torp } 2765c678b79SKarl Bonde Torp 2775c678b79SKarl Bonde Torp _xnvme_submit_request(ch, bdev_io); 2785c678b79SKarl Bonde Torp } 2795c678b79SKarl Bonde Torp 2805c678b79SKarl Bonde Torp static void 2816f338d4bSKrishna Kanth Reddy bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 2826f338d4bSKrishna Kanth Reddy { 2836f338d4bSKrishna Kanth Reddy switch (bdev_io->type) { 2846f338d4bSKrishna Kanth Reddy /* Read and write operations must be performed on buffers aligned to 2856f338d4bSKrishna Kanth Reddy * bdev->required_alignment. If user specified unaligned buffers, 2866f338d4bSKrishna Kanth Reddy * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 2876f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_READ: 2886f338d4bSKrishna Kanth Reddy case SPDK_BDEV_IO_TYPE_WRITE: 2896f338d4bSKrishna Kanth Reddy spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 2906f338d4bSKrishna Kanth Reddy bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 2916f338d4bSKrishna Kanth Reddy break; 292f999d891SNadja Brix Koch case SPDK_BDEV_IO_TYPE_UNMAP: 293f999d891SNadja Brix Koch /* The max number of segments defined by spec is 256 and an 294f999d891SNadja Brix Koch * spdk_nvme_dsm_range structure is 16 bytes */ 295f999d891SNadja Brix Koch spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 256 * 16); 296f999d891SNadja Brix Koch break; 297e3dde60aSKarl Bonde Torp case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 298e3dde60aSKarl Bonde Torp _xnvme_submit_request(ch, bdev_io); 299e3dde60aSKarl Bonde Torp break; 3006f338d4bSKrishna Kanth Reddy 3016f338d4bSKrishna Kanth Reddy default: 3026f338d4bSKrishna Kanth Reddy spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 3036f338d4bSKrishna Kanth Reddy break; 3046f338d4bSKrishna Kanth Reddy } 3056f338d4bSKrishna Kanth Reddy } 3066f338d4bSKrishna Kanth Reddy 3076f338d4bSKrishna Kanth Reddy static const struct spdk_bdev_fn_table xnvme_fn_table = { 3086f338d4bSKrishna Kanth Reddy .destruct = bdev_xnvme_destruct, 3096f338d4bSKrishna Kanth Reddy .submit_request = bdev_xnvme_submit_request, 3106f338d4bSKrishna Kanth Reddy .io_type_supported = bdev_xnvme_io_type_supported, 3116f338d4bSKrishna Kanth Reddy .get_io_channel = bdev_xnvme_get_io_channel, 3126f338d4bSKrishna Kanth Reddy }; 3136f338d4bSKrishna Kanth Reddy 3146f338d4bSKrishna Kanth Reddy static void 3156f338d4bSKrishna Kanth Reddy bdev_xnvme_free(struct bdev_xnvme *xnvme) 3166f338d4bSKrishna Kanth Reddy { 3176f338d4bSKrishna Kanth Reddy assert(xnvme != NULL); 3186f338d4bSKrishna Kanth Reddy 31943c5293eSyidong0635 xnvme_dev_close(xnvme->dev); 32069bcb185STomasz Zawadzki free(xnvme->io_mechanism); 3216f338d4bSKrishna Kanth Reddy free(xnvme->filename); 3226f338d4bSKrishna Kanth Reddy free(xnvme->bdev.name); 3236f338d4bSKrishna Kanth Reddy free(xnvme); 3246f338d4bSKrishna Kanth Reddy } 3256f338d4bSKrishna Kanth Reddy 3266f338d4bSKrishna Kanth Reddy static void 3276f338d4bSKrishna Kanth Reddy bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg) 3286f338d4bSKrishna Kanth Reddy { 3296f338d4bSKrishna Kanth Reddy struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg; 3306f338d4bSKrishna Kanth Reddy enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 3316f338d4bSKrishna Kanth Reddy 3326f338d4bSKrishna Kanth Reddy SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task); 3336f338d4bSKrishna Kanth Reddy 3346f338d4bSKrishna Kanth Reddy if (xnvme_cmd_ctx_cpl_status(ctx)) { 3356f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("xNVMe I/O Failed\n"); 3366f338d4bSKrishna Kanth Reddy xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF); 3376f338d4bSKrishna Kanth Reddy status = SPDK_BDEV_IO_STATUS_FAILED; 3386f338d4bSKrishna Kanth Reddy } 3396f338d4bSKrishna Kanth Reddy 3406f338d4bSKrishna Kanth Reddy spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status); 3416f338d4bSKrishna Kanth Reddy 3426f338d4bSKrishna Kanth Reddy /* Completed: Put the command- context back in the queue */ 3436f338d4bSKrishna Kanth Reddy xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx); 3446f338d4bSKrishna Kanth Reddy } 3456f338d4bSKrishna Kanth Reddy 3466f338d4bSKrishna Kanth Reddy static int 3476f338d4bSKrishna Kanth Reddy bdev_xnvme_poll(void *arg) 3486f338d4bSKrishna Kanth Reddy { 3496f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel *ch = arg; 3506f338d4bSKrishna Kanth Reddy int rc; 3516f338d4bSKrishna Kanth Reddy 3526f338d4bSKrishna Kanth Reddy rc = xnvme_queue_poke(ch->queue, 0); 3536f338d4bSKrishna Kanth Reddy if (rc < 0) { 3546f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc); 3556f338d4bSKrishna Kanth Reddy return SPDK_POLLER_BUSY; 3566f338d4bSKrishna Kanth Reddy } 3576f338d4bSKrishna Kanth Reddy 3586f338d4bSKrishna Kanth Reddy return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 3596f338d4bSKrishna Kanth Reddy } 3606f338d4bSKrishna Kanth Reddy 3616f338d4bSKrishna Kanth Reddy static int 3626f338d4bSKrishna Kanth Reddy bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf) 3636f338d4bSKrishna Kanth Reddy { 3646f338d4bSKrishna Kanth Reddy struct bdev_xnvme *xnvme = io_device; 3656f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel *ch = ctx_buf; 3666f338d4bSKrishna Kanth Reddy int rc; 3676f338d4bSKrishna Kanth Reddy int qd = 512; 3686f338d4bSKrishna Kanth Reddy 3696f338d4bSKrishna Kanth Reddy rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue); 3706f338d4bSKrishna Kanth Reddy if (rc) { 3716f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc); 3726f338d4bSKrishna Kanth Reddy return 1; 3736f338d4bSKrishna Kanth Reddy } 3746f338d4bSKrishna Kanth Reddy 3756f338d4bSKrishna Kanth Reddy xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch); 3766f338d4bSKrishna Kanth Reddy 3776f338d4bSKrishna Kanth Reddy ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0); 3786f338d4bSKrishna Kanth Reddy 3796f338d4bSKrishna Kanth Reddy return 0; 3806f338d4bSKrishna Kanth Reddy } 3816f338d4bSKrishna Kanth Reddy 3826f338d4bSKrishna Kanth Reddy static void 3836f338d4bSKrishna Kanth Reddy bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf) 3846f338d4bSKrishna Kanth Reddy { 3856f338d4bSKrishna Kanth Reddy struct bdev_xnvme_io_channel *ch = ctx_buf; 3866f338d4bSKrishna Kanth Reddy 3876f338d4bSKrishna Kanth Reddy spdk_poller_unregister(&ch->poller); 3886f338d4bSKrishna Kanth Reddy 3896f338d4bSKrishna Kanth Reddy xnvme_queue_term(ch->queue); 3906f338d4bSKrishna Kanth Reddy } 3916f338d4bSKrishna Kanth Reddy 3926f338d4bSKrishna Kanth Reddy struct spdk_bdev * 393b99b00e5SSimon A. F. Lund create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism, 394b99b00e5SSimon A. F. Lund bool conserve_cpu) 3956f338d4bSKrishna Kanth Reddy { 3966f338d4bSKrishna Kanth Reddy struct bdev_xnvme *xnvme; 397f999d891SNadja Brix Koch const struct xnvme_spec_nvm_idfy_ctrlr *ctrlr; 3986f338d4bSKrishna Kanth Reddy uint32_t block_size; 3996f338d4bSKrishna Kanth Reddy uint64_t bdev_size; 4006f338d4bSKrishna Kanth Reddy int rc; 4016f338d4bSKrishna Kanth Reddy struct xnvme_opts opts = xnvme_opts_default(); 4026f338d4bSKrishna Kanth Reddy 4036f338d4bSKrishna Kanth Reddy xnvme = calloc(1, sizeof(*xnvme)); 4046f338d4bSKrishna Kanth Reddy if (!xnvme) { 4056f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n"); 4066f338d4bSKrishna Kanth Reddy return NULL; 4076f338d4bSKrishna Kanth Reddy } 4086f338d4bSKrishna Kanth Reddy 40917948bcaSKrishna Kanth Reddy opts.direct = 1; 4106f338d4bSKrishna Kanth Reddy opts.async = io_mechanism; 4116f338d4bSKrishna Kanth Reddy if (!opts.async) { 4126f338d4bSKrishna Kanth Reddy goto error_return; 4136f338d4bSKrishna Kanth Reddy } 41469bcb185STomasz Zawadzki xnvme->io_mechanism = strdup(io_mechanism); 41569bcb185STomasz Zawadzki if (!xnvme->io_mechanism) { 41669bcb185STomasz Zawadzki goto error_return; 41769bcb185STomasz Zawadzki } 4186f338d4bSKrishna Kanth Reddy 419*1c2e5edaSMichal Berger xnvme->conserve_cpu = conserve_cpu; 420*1c2e5edaSMichal Berger if (!xnvme->conserve_cpu) { 421b99b00e5SSimon A. F. Lund if (!strcmp(xnvme->io_mechanism, "libaio")) { 422b99b00e5SSimon A. F. Lund opts.poll_io = 1; 423b99b00e5SSimon A. F. Lund } else if (!strcmp(xnvme->io_mechanism, "io_uring")) { 424b99b00e5SSimon A. F. Lund opts.poll_io = 1; 425b99b00e5SSimon A. F. Lund } else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { 4268f243d69SKarl Bonde Torp opts.poll_io = 1; 427cfe5d188SSimon A. F. Lund } 428b99b00e5SSimon A. F. Lund } 429cfe5d188SSimon A. F. Lund 4306f338d4bSKrishna Kanth Reddy xnvme->filename = strdup(filename); 4316f338d4bSKrishna Kanth Reddy if (!xnvme->filename) { 4326f338d4bSKrishna Kanth Reddy goto error_return; 4336f338d4bSKrishna Kanth Reddy } 4346f338d4bSKrishna Kanth Reddy 4356f338d4bSKrishna Kanth Reddy xnvme->dev = xnvme_dev_open(xnvme->filename, &opts); 4366f338d4bSKrishna Kanth Reddy if (!xnvme->dev) { 4376f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename); 4386f338d4bSKrishna Kanth Reddy goto error_return; 4396f338d4bSKrishna Kanth Reddy } 4406f338d4bSKrishna Kanth Reddy 4416f338d4bSKrishna Kanth Reddy xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev); 4426f338d4bSKrishna Kanth Reddy 4436f338d4bSKrishna Kanth Reddy bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes; 4446f338d4bSKrishna Kanth Reddy block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes; 4456f338d4bSKrishna Kanth Reddy 4466f338d4bSKrishna Kanth Reddy xnvme->bdev.name = strdup(name); 4476f338d4bSKrishna Kanth Reddy if (!xnvme->bdev.name) { 4486f338d4bSKrishna Kanth Reddy goto error_return; 4496f338d4bSKrishna Kanth Reddy } 4506f338d4bSKrishna Kanth Reddy 4516f338d4bSKrishna Kanth Reddy xnvme->bdev.product_name = "xNVMe bdev"; 4526f338d4bSKrishna Kanth Reddy xnvme->bdev.module = &xnvme_if; 4536f338d4bSKrishna Kanth Reddy 4546f338d4bSKrishna Kanth Reddy xnvme->bdev.write_cache = 0; 455e3dde60aSKarl Bonde Torp xnvme->bdev.max_write_zeroes = UINT16_MAX + 1; 4566f338d4bSKrishna Kanth Reddy 457f999d891SNadja Brix Koch if (xnvme_dev_get_csi(xnvme->dev) == XNVME_SPEC_CSI_NVM) { 458f999d891SNadja Brix Koch ctrlr = (struct xnvme_spec_nvm_idfy_ctrlr *) xnvme_dev_get_ctrlr_css(xnvme->dev); 459f999d891SNadja Brix Koch xnvme->bdev.max_unmap = ctrlr->dmrsl ? ctrlr->dmrsl : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 460f999d891SNadja Brix Koch xnvme->bdev.max_unmap_segments = ctrlr->dmrl ? ctrlr->dmrl : 461f999d891SNadja Brix Koch SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES; 462f999d891SNadja Brix Koch } 463f999d891SNadja Brix Koch 4646f338d4bSKrishna Kanth Reddy if (block_size == 0) { 4656f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Block size could not be auto-detected\n"); 4666f338d4bSKrishna Kanth Reddy goto error_return; 4676f338d4bSKrishna Kanth Reddy } 4686f338d4bSKrishna Kanth Reddy 4696f338d4bSKrishna Kanth Reddy if (block_size < 512) { 4706f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 4716f338d4bSKrishna Kanth Reddy goto error_return; 4726f338d4bSKrishna Kanth Reddy } 4736f338d4bSKrishna Kanth Reddy 4746f338d4bSKrishna Kanth Reddy if (!spdk_u32_is_pow2(block_size)) { 4756f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 4766f338d4bSKrishna Kanth Reddy goto error_return; 4776f338d4bSKrishna Kanth Reddy } 4786f338d4bSKrishna Kanth Reddy 4796f338d4bSKrishna Kanth Reddy SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n", 4806f338d4bSKrishna Kanth Reddy xnvme->bdev.name, bdev_size, block_size); 4816f338d4bSKrishna Kanth Reddy 4826f338d4bSKrishna Kanth Reddy xnvme->bdev.blocklen = block_size; 4836f338d4bSKrishna Kanth Reddy xnvme->bdev.required_alignment = spdk_u32log2(block_size); 4846f338d4bSKrishna Kanth Reddy 4856f338d4bSKrishna Kanth Reddy if (bdev_size % xnvme->bdev.blocklen != 0) { 4866f338d4bSKrishna Kanth Reddy SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 4876f338d4bSKrishna Kanth Reddy bdev_size, xnvme->bdev.blocklen); 4886f338d4bSKrishna Kanth Reddy goto error_return; 4896f338d4bSKrishna Kanth Reddy } 4906f338d4bSKrishna Kanth Reddy 4916f338d4bSKrishna Kanth Reddy xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen; 4926f338d4bSKrishna Kanth Reddy xnvme->bdev.ctxt = xnvme; 4936f338d4bSKrishna Kanth Reddy 4946f338d4bSKrishna Kanth Reddy xnvme->bdev.fn_table = &xnvme_fn_table; 4956f338d4bSKrishna Kanth Reddy 4966f338d4bSKrishna Kanth Reddy spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb, 4976f338d4bSKrishna Kanth Reddy sizeof(struct bdev_xnvme_io_channel), 4986f338d4bSKrishna Kanth Reddy xnvme->bdev.name); 4996f338d4bSKrishna Kanth Reddy rc = spdk_bdev_register(&xnvme->bdev); 5006f338d4bSKrishna Kanth Reddy if (rc) { 5016f338d4bSKrishna Kanth Reddy spdk_io_device_unregister(xnvme, NULL); 5026f338d4bSKrishna Kanth Reddy goto error_return; 5036f338d4bSKrishna Kanth Reddy } 5046f338d4bSKrishna Kanth Reddy 5056f338d4bSKrishna Kanth Reddy TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link); 5066f338d4bSKrishna Kanth Reddy 5076f338d4bSKrishna Kanth Reddy return &xnvme->bdev; 5086f338d4bSKrishna Kanth Reddy 5096f338d4bSKrishna Kanth Reddy error_return: 5106f338d4bSKrishna Kanth Reddy bdev_xnvme_free(xnvme); 5116f338d4bSKrishna Kanth Reddy return NULL; 5126f338d4bSKrishna Kanth Reddy } 5136f338d4bSKrishna Kanth Reddy 5146f338d4bSKrishna Kanth Reddy void 5153c5fdc06SShuhei Matsumoto delete_xnvme_bdev(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 5166f338d4bSKrishna Kanth Reddy { 5173c5fdc06SShuhei Matsumoto int rc; 5186f338d4bSKrishna Kanth Reddy 5193c5fdc06SShuhei Matsumoto rc = spdk_bdev_unregister_by_name(name, &xnvme_if, cb_fn, cb_arg); 5203c5fdc06SShuhei Matsumoto if (rc != 0) { 5213c5fdc06SShuhei Matsumoto cb_fn(cb_arg, rc); 5226f338d4bSKrishna Kanth Reddy } 5236f338d4bSKrishna Kanth Reddy } 5246f338d4bSKrishna Kanth Reddy 5256f338d4bSKrishna Kanth Reddy static int 5266f338d4bSKrishna Kanth Reddy bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf) 5276f338d4bSKrishna Kanth Reddy { 5286f338d4bSKrishna Kanth Reddy return 0; 5296f338d4bSKrishna Kanth Reddy } 5306f338d4bSKrishna Kanth Reddy 5316f338d4bSKrishna Kanth Reddy static void 5326f338d4bSKrishna Kanth Reddy bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf) 5336f338d4bSKrishna Kanth Reddy { 5346f338d4bSKrishna Kanth Reddy } 5356f338d4bSKrishna Kanth Reddy 5366f338d4bSKrishna Kanth Reddy static int 5376f338d4bSKrishna Kanth Reddy bdev_xnvme_init(void) 5386f338d4bSKrishna Kanth Reddy { 5396f338d4bSKrishna Kanth Reddy spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb, 5406f338d4bSKrishna Kanth Reddy 0, "xnvme_module"); 5416f338d4bSKrishna Kanth Reddy 5426f338d4bSKrishna Kanth Reddy return 0; 5436f338d4bSKrishna Kanth Reddy } 5446f338d4bSKrishna Kanth Reddy 5456f338d4bSKrishna Kanth Reddy static void 5466f338d4bSKrishna Kanth Reddy bdev_xnvme_fini(void) 5476f338d4bSKrishna Kanth Reddy { 5486f338d4bSKrishna Kanth Reddy spdk_io_device_unregister(&xnvme_if, NULL); 5496f338d4bSKrishna Kanth Reddy } 5506f338d4bSKrishna Kanth Reddy 5516f338d4bSKrishna Kanth Reddy SPDK_LOG_REGISTER_COMPONENT(xnvme) 552