1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * Copyright (c) Samsung Electronics Co., Ltd. 4 * All rights reserved. 5 */ 6 7 #include "libxnvme.h" 8 #include "libxnvme_pp.h" 9 10 #include "bdev_xnvme.h" 11 12 #include "spdk/stdinc.h" 13 14 #include "spdk/barrier.h" 15 #include "spdk/bdev.h" 16 #include "spdk/env.h" 17 #include "spdk/fd.h" 18 #include "spdk/likely.h" 19 #include "spdk/thread.h" 20 #include "spdk/json.h" 21 #include "spdk/util.h" 22 #include "spdk/string.h" 23 24 #include "spdk/log.h" 25 26 struct bdev_xnvme_io_channel { 27 struct xnvme_queue *queue; 28 struct spdk_poller *poller; 29 }; 30 31 struct bdev_xnvme_task { 32 struct bdev_xnvme_io_channel *ch; 33 TAILQ_ENTRY(bdev_xnvme_task) link; 34 }; 35 36 struct bdev_xnvme { 37 struct spdk_bdev bdev; 38 char *filename; 39 char *io_mechanism; 40 struct xnvme_dev *dev; 41 uint32_t nsid; 42 bool conserve_cpu; 43 44 TAILQ_ENTRY(bdev_xnvme) link; 45 }; 46 47 static int bdev_xnvme_init(void); 48 static void bdev_xnvme_fini(void); 49 static void bdev_xnvme_free(struct bdev_xnvme *xnvme); 50 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head); 51 52 static int 53 bdev_xnvme_get_ctx_size(void) 54 { 55 return sizeof(struct bdev_xnvme_task); 56 } 57 58 static int 59 bdev_xnvme_config_json(struct spdk_json_write_ctx *w) 60 { 61 struct bdev_xnvme *xnvme; 62 63 TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) { 64 spdk_json_write_object_begin(w); 65 66 spdk_json_write_named_string(w, "method", "bdev_xnvme_create"); 67 68 spdk_json_write_named_object_begin(w, "params"); 69 spdk_json_write_named_string(w, "name", xnvme->bdev.name); 70 spdk_json_write_named_string(w, "filename", xnvme->filename); 71 spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); 72 spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu); 73 spdk_json_write_object_end(w); 74 75 spdk_json_write_object_end(w); 76 } 77 78 return 0; 79 } 80 81 static struct spdk_bdev_module xnvme_if = { 82 .name = "xnvme", 83 .module_init = bdev_xnvme_init, 84 .module_fini = bdev_xnvme_fini, 85 .get_ctx_size = bdev_xnvme_get_ctx_size, 86 .config_json = bdev_xnvme_config_json, 87 }; 88 89 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if) 90 91 static struct spdk_io_channel * 92 bdev_xnvme_get_io_channel(void *ctx) 93 { 94 struct bdev_xnvme *xnvme = ctx; 95 96 return spdk_get_io_channel(xnvme); 97 } 98 99 static bool 100 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 101 { 102 switch (io_type) { 103 case SPDK_BDEV_IO_TYPE_READ: 104 case SPDK_BDEV_IO_TYPE_WRITE: 105 return true; 106 default: 107 return false; 108 } 109 } 110 111 static void 112 bdev_xnvme_destruct_cb(void *io_device) 113 { 114 struct bdev_xnvme *xnvme = io_device; 115 116 TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link); 117 bdev_xnvme_free(xnvme); 118 } 119 120 static int 121 bdev_xnvme_destruct(void *ctx) 122 { 123 struct bdev_xnvme *xnvme = ctx; 124 125 spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb); 126 127 return 0; 128 } 129 130 static void 131 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 132 { 133 struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx; 134 struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt; 135 struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch); 136 struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue); 137 int err; 138 139 if (!success) { 140 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 141 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 142 return; 143 } 144 145 SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n", 146 bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx); 147 148 switch (bdev_io->type) { 149 case SPDK_BDEV_IO_TYPE_READ: 150 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ; 151 ctx->cmd.common.nsid = xnvme->nsid; 152 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 153 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 154 break; 155 case SPDK_BDEV_IO_TYPE_WRITE: 156 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE; 157 ctx->cmd.common.nsid = xnvme->nsid; 158 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 159 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 160 break; 161 162 default: 163 SPDK_ERRLOG("Wrong io type\n"); 164 165 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 166 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 167 return; 168 } 169 170 xnvme_task->ch = xnvme_ch; 171 ctx->async.cb_arg = xnvme_task; 172 173 err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 174 bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0); 175 176 switch (err) { 177 /* Submission success! */ 178 case 0: 179 SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n", 180 xnvme_ch, bdev_io->u.bdev.iovcnt, 181 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); 182 return; 183 184 /* Submission failed: queue is full or no memory => Queue the I/O in bdev layer */ 185 case -EBUSY: 186 case -EAGAIN: 187 case -ENOMEM: 188 SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n"); 189 190 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 191 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 192 return; 193 194 /* Submission failed: unexpected error, put the command-context back in the queue */ 195 default: 196 SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n"); 197 198 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 199 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 200 return; 201 } 202 } 203 204 static void 205 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 206 { 207 switch (bdev_io->type) { 208 /* Read and write operations must be performed on buffers aligned to 209 * bdev->required_alignment. If user specified unaligned buffers, 210 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 211 case SPDK_BDEV_IO_TYPE_READ: 212 case SPDK_BDEV_IO_TYPE_WRITE: 213 spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 214 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 215 break; 216 217 default: 218 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 219 break; 220 } 221 } 222 223 static const struct spdk_bdev_fn_table xnvme_fn_table = { 224 .destruct = bdev_xnvme_destruct, 225 .submit_request = bdev_xnvme_submit_request, 226 .io_type_supported = bdev_xnvme_io_type_supported, 227 .get_io_channel = bdev_xnvme_get_io_channel, 228 }; 229 230 static void 231 bdev_xnvme_free(struct bdev_xnvme *xnvme) 232 { 233 assert(xnvme != NULL); 234 235 xnvme_dev_close(xnvme->dev); 236 free(xnvme->io_mechanism); 237 free(xnvme->filename); 238 free(xnvme->bdev.name); 239 free(xnvme); 240 } 241 242 static void 243 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg) 244 { 245 struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg; 246 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 247 248 SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task); 249 250 if (xnvme_cmd_ctx_cpl_status(ctx)) { 251 SPDK_ERRLOG("xNVMe I/O Failed\n"); 252 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF); 253 status = SPDK_BDEV_IO_STATUS_FAILED; 254 } 255 256 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status); 257 258 /* Completed: Put the command- context back in the queue */ 259 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx); 260 } 261 262 static int 263 bdev_xnvme_poll(void *arg) 264 { 265 struct bdev_xnvme_io_channel *ch = arg; 266 int rc; 267 268 rc = xnvme_queue_poke(ch->queue, 0); 269 if (rc < 0) { 270 SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc); 271 return SPDK_POLLER_BUSY; 272 } 273 274 return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 275 } 276 277 static int 278 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf) 279 { 280 struct bdev_xnvme *xnvme = io_device; 281 struct bdev_xnvme_io_channel *ch = ctx_buf; 282 int rc; 283 int qd = 512; 284 285 rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue); 286 if (rc) { 287 SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc); 288 return 1; 289 } 290 291 xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch); 292 293 ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0); 294 295 return 0; 296 } 297 298 static void 299 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf) 300 { 301 struct bdev_xnvme_io_channel *ch = ctx_buf; 302 303 spdk_poller_unregister(&ch->poller); 304 305 xnvme_queue_term(ch->queue); 306 } 307 308 struct spdk_bdev * 309 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism, 310 bool conserve_cpu) 311 { 312 struct bdev_xnvme *xnvme; 313 uint32_t block_size; 314 uint64_t bdev_size; 315 int rc; 316 struct xnvme_opts opts = xnvme_opts_default(); 317 318 xnvme = calloc(1, sizeof(*xnvme)); 319 if (!xnvme) { 320 SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n"); 321 return NULL; 322 } 323 324 opts.direct = 1; 325 opts.async = io_mechanism; 326 if (!opts.async) { 327 goto error_return; 328 } 329 xnvme->io_mechanism = strdup(io_mechanism); 330 if (!xnvme->io_mechanism) { 331 goto error_return; 332 } 333 334 if (!conserve_cpu) { 335 if (!strcmp(xnvme->io_mechanism, "libaio")) { 336 opts.poll_io = 1; 337 } else if (!strcmp(xnvme->io_mechanism, "io_uring")) { 338 opts.poll_io = 1; 339 } else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { 340 opts.poll_sq = 1; 341 } 342 } 343 344 xnvme->filename = strdup(filename); 345 if (!xnvme->filename) { 346 goto error_return; 347 } 348 349 xnvme->dev = xnvme_dev_open(xnvme->filename, &opts); 350 if (!xnvme->dev) { 351 SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename); 352 goto error_return; 353 } 354 355 xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev); 356 357 bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes; 358 block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes; 359 360 xnvme->bdev.name = strdup(name); 361 if (!xnvme->bdev.name) { 362 goto error_return; 363 } 364 365 xnvme->bdev.product_name = "xNVMe bdev"; 366 xnvme->bdev.module = &xnvme_if; 367 368 xnvme->bdev.write_cache = 0; 369 370 if (block_size == 0) { 371 SPDK_ERRLOG("Block size could not be auto-detected\n"); 372 goto error_return; 373 } 374 375 if (block_size < 512) { 376 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 377 goto error_return; 378 } 379 380 if (!spdk_u32_is_pow2(block_size)) { 381 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 382 goto error_return; 383 } 384 385 SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n", 386 xnvme->bdev.name, bdev_size, block_size); 387 388 xnvme->bdev.blocklen = block_size; 389 xnvme->bdev.required_alignment = spdk_u32log2(block_size); 390 391 if (bdev_size % xnvme->bdev.blocklen != 0) { 392 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 393 bdev_size, xnvme->bdev.blocklen); 394 goto error_return; 395 } 396 397 xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen; 398 xnvme->bdev.ctxt = xnvme; 399 400 xnvme->bdev.fn_table = &xnvme_fn_table; 401 402 spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb, 403 sizeof(struct bdev_xnvme_io_channel), 404 xnvme->bdev.name); 405 rc = spdk_bdev_register(&xnvme->bdev); 406 if (rc) { 407 spdk_io_device_unregister(xnvme, NULL); 408 goto error_return; 409 } 410 411 TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link); 412 413 return &xnvme->bdev; 414 415 error_return: 416 bdev_xnvme_free(xnvme); 417 return NULL; 418 } 419 420 void 421 delete_xnvme_bdev(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 422 { 423 int rc; 424 425 rc = spdk_bdev_unregister_by_name(name, &xnvme_if, cb_fn, cb_arg); 426 if (rc != 0) { 427 cb_fn(cb_arg, rc); 428 } 429 } 430 431 static int 432 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf) 433 { 434 return 0; 435 } 436 437 static void 438 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf) 439 { 440 } 441 442 static int 443 bdev_xnvme_init(void) 444 { 445 spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb, 446 0, "xnvme_module"); 447 448 return 0; 449 } 450 451 static void 452 bdev_xnvme_fini(void) 453 { 454 spdk_io_device_unregister(&xnvme_if, NULL); 455 } 456 457 SPDK_LOG_REGISTER_COMPONENT(xnvme) 458