1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * Copyright (c) Samsung Electronics Co., Ltd. 4 * All rights reserved. 5 */ 6 7 #include "libxnvme.h" 8 #include "libxnvme_pp.h" 9 10 #include "bdev_xnvme.h" 11 12 #include "spdk/stdinc.h" 13 14 #include "spdk/barrier.h" 15 #include "spdk/bdev.h" 16 #include "spdk/env.h" 17 #include "spdk/fd.h" 18 #include "spdk/likely.h" 19 #include "spdk/thread.h" 20 #include "spdk/json.h" 21 #include "spdk/util.h" 22 #include "spdk/string.h" 23 24 #include "spdk/log.h" 25 26 struct bdev_xnvme_io_channel { 27 struct xnvme_queue *queue; 28 struct spdk_poller *poller; 29 }; 30 31 struct bdev_xnvme_task { 32 struct bdev_xnvme_io_channel *ch; 33 TAILQ_ENTRY(bdev_xnvme_task) link; 34 }; 35 36 struct bdev_xnvme { 37 struct spdk_bdev bdev; 38 char *filename; 39 char *io_mechanism; 40 struct xnvme_dev *dev; 41 uint32_t nsid; 42 43 TAILQ_ENTRY(bdev_xnvme) link; 44 }; 45 46 static int bdev_xnvme_init(void); 47 static void bdev_xnvme_fini(void); 48 static void bdev_xnvme_free(struct bdev_xnvme *xnvme); 49 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head); 50 51 static int 52 bdev_xnvme_get_ctx_size(void) 53 { 54 return sizeof(struct bdev_xnvme_task); 55 } 56 57 static int 58 bdev_xnvme_config_json(struct spdk_json_write_ctx *w) 59 { 60 struct bdev_xnvme *xnvme; 61 62 TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) { 63 spdk_json_write_object_begin(w); 64 65 spdk_json_write_named_string(w, "method", "bdev_xnvme_create"); 66 67 spdk_json_write_named_object_begin(w, "params"); 68 spdk_json_write_named_string(w, "name", xnvme->bdev.name); 69 spdk_json_write_named_string(w, "filename", xnvme->filename); 70 spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); 71 spdk_json_write_object_end(w); 72 73 spdk_json_write_object_end(w); 74 } 75 76 return 0; 77 } 78 79 static struct spdk_bdev_module xnvme_if = { 80 .name = "xnvme", 81 .module_init = bdev_xnvme_init, 82 .module_fini = bdev_xnvme_fini, 83 .get_ctx_size = bdev_xnvme_get_ctx_size, 84 .config_json = bdev_xnvme_config_json, 85 }; 86 87 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if) 88 89 static struct spdk_io_channel * 90 bdev_xnvme_get_io_channel(void *ctx) 91 { 92 struct bdev_xnvme *xnvme = ctx; 93 94 return spdk_get_io_channel(xnvme); 95 } 96 97 static bool 98 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 99 { 100 switch (io_type) { 101 case SPDK_BDEV_IO_TYPE_READ: 102 case SPDK_BDEV_IO_TYPE_WRITE: 103 return true; 104 default: 105 return false; 106 } 107 } 108 109 static void 110 bdev_xnvme_destruct_cb(void *io_device) 111 { 112 struct bdev_xnvme *xnvme = io_device; 113 114 TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link); 115 bdev_xnvme_free(xnvme); 116 } 117 118 static int 119 bdev_xnvme_destruct(void *ctx) 120 { 121 struct bdev_xnvme *xnvme = ctx; 122 123 spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb); 124 125 return 0; 126 } 127 128 static void 129 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 130 { 131 struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx; 132 struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt; 133 struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch); 134 struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue); 135 int err; 136 137 if (!success) { 138 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 139 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 140 return; 141 } 142 143 SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n", 144 bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx); 145 146 switch (bdev_io->type) { 147 case SPDK_BDEV_IO_TYPE_READ: 148 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ; 149 ctx->cmd.common.nsid = xnvme->nsid; 150 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 151 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 152 break; 153 case SPDK_BDEV_IO_TYPE_WRITE: 154 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE; 155 ctx->cmd.common.nsid = xnvme->nsid; 156 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 157 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 158 break; 159 160 default: 161 SPDK_ERRLOG("Wrong io type\n"); 162 163 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 164 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 165 return; 166 } 167 168 xnvme_task->ch = xnvme_ch; 169 ctx->async.cb_arg = xnvme_task; 170 171 err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 172 bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0); 173 174 switch (err) { 175 /* Submission success! */ 176 case 0: 177 SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n", 178 xnvme_ch, bdev_io->u.bdev.iovcnt, 179 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); 180 return; 181 182 /* Submission failed: queue is full or no memory => Queue the I/O in bdev layer */ 183 case -EBUSY: 184 case -EAGAIN: 185 case -ENOMEM: 186 SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n"); 187 188 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 189 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 190 return; 191 192 /* Submission failed: unexpected error, put the command-context back in the queue */ 193 default: 194 SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n"); 195 196 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 197 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 198 return; 199 } 200 } 201 202 static void 203 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 204 { 205 switch (bdev_io->type) { 206 /* Read and write operations must be performed on buffers aligned to 207 * bdev->required_alignment. If user specified unaligned buffers, 208 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 209 case SPDK_BDEV_IO_TYPE_READ: 210 case SPDK_BDEV_IO_TYPE_WRITE: 211 spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 212 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 213 break; 214 215 default: 216 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 217 break; 218 } 219 } 220 221 static const struct spdk_bdev_fn_table xnvme_fn_table = { 222 .destruct = bdev_xnvme_destruct, 223 .submit_request = bdev_xnvme_submit_request, 224 .io_type_supported = bdev_xnvme_io_type_supported, 225 .get_io_channel = bdev_xnvme_get_io_channel, 226 }; 227 228 static void 229 bdev_xnvme_free(struct bdev_xnvme *xnvme) 230 { 231 assert(xnvme != NULL); 232 233 xnvme_dev_close(xnvme->dev); 234 free(xnvme->io_mechanism); 235 free(xnvme->filename); 236 free(xnvme->bdev.name); 237 free(xnvme); 238 } 239 240 static void 241 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg) 242 { 243 struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg; 244 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 245 246 SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task); 247 248 if (xnvme_cmd_ctx_cpl_status(ctx)) { 249 SPDK_ERRLOG("xNVMe I/O Failed\n"); 250 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF); 251 status = SPDK_BDEV_IO_STATUS_FAILED; 252 } 253 254 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status); 255 256 /* Completed: Put the command- context back in the queue */ 257 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx); 258 } 259 260 static int 261 bdev_xnvme_poll(void *arg) 262 { 263 struct bdev_xnvme_io_channel *ch = arg; 264 int rc; 265 266 rc = xnvme_queue_poke(ch->queue, 0); 267 if (rc < 0) { 268 SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc); 269 return SPDK_POLLER_BUSY; 270 } 271 272 return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 273 } 274 275 static int 276 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf) 277 { 278 struct bdev_xnvme *xnvme = io_device; 279 struct bdev_xnvme_io_channel *ch = ctx_buf; 280 int rc; 281 int qd = 512; 282 283 rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue); 284 if (rc) { 285 SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc); 286 return 1; 287 } 288 289 xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch); 290 291 ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0); 292 293 return 0; 294 } 295 296 static void 297 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf) 298 { 299 struct bdev_xnvme_io_channel *ch = ctx_buf; 300 301 spdk_poller_unregister(&ch->poller); 302 303 xnvme_queue_term(ch->queue); 304 } 305 306 struct spdk_bdev * 307 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism) 308 { 309 struct bdev_xnvme *xnvme; 310 uint32_t block_size; 311 uint64_t bdev_size; 312 int rc; 313 struct xnvme_opts opts = xnvme_opts_default(); 314 315 xnvme = calloc(1, sizeof(*xnvme)); 316 if (!xnvme) { 317 SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n"); 318 return NULL; 319 } 320 321 opts.direct = 1; 322 opts.async = io_mechanism; 323 if (!opts.async) { 324 goto error_return; 325 } 326 xnvme->io_mechanism = strdup(io_mechanism); 327 if (!xnvme->io_mechanism) { 328 goto error_return; 329 } 330 331 if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { 332 opts.poll_sq = 1; 333 } 334 335 xnvme->filename = strdup(filename); 336 if (!xnvme->filename) { 337 goto error_return; 338 } 339 340 xnvme->dev = xnvme_dev_open(xnvme->filename, &opts); 341 if (!xnvme->dev) { 342 SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename); 343 goto error_return; 344 } 345 346 xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev); 347 348 bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes; 349 block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes; 350 351 xnvme->bdev.name = strdup(name); 352 if (!xnvme->bdev.name) { 353 goto error_return; 354 } 355 356 xnvme->bdev.product_name = "xNVMe bdev"; 357 xnvme->bdev.module = &xnvme_if; 358 359 xnvme->bdev.write_cache = 0; 360 361 if (block_size == 0) { 362 SPDK_ERRLOG("Block size could not be auto-detected\n"); 363 goto error_return; 364 } 365 366 if (block_size < 512) { 367 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 368 goto error_return; 369 } 370 371 if (!spdk_u32_is_pow2(block_size)) { 372 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 373 goto error_return; 374 } 375 376 SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n", 377 xnvme->bdev.name, bdev_size, block_size); 378 379 xnvme->bdev.blocklen = block_size; 380 xnvme->bdev.required_alignment = spdk_u32log2(block_size); 381 382 if (bdev_size % xnvme->bdev.blocklen != 0) { 383 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 384 bdev_size, xnvme->bdev.blocklen); 385 goto error_return; 386 } 387 388 xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen; 389 xnvme->bdev.ctxt = xnvme; 390 391 xnvme->bdev.fn_table = &xnvme_fn_table; 392 393 spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb, 394 sizeof(struct bdev_xnvme_io_channel), 395 xnvme->bdev.name); 396 rc = spdk_bdev_register(&xnvme->bdev); 397 if (rc) { 398 spdk_io_device_unregister(xnvme, NULL); 399 goto error_return; 400 } 401 402 TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link); 403 404 return &xnvme->bdev; 405 406 error_return: 407 bdev_xnvme_free(xnvme); 408 return NULL; 409 } 410 411 struct delete_xnvme_bdev_ctx { 412 struct bdev_xnvme *xnvme; 413 spdk_delete_xnvme_complete cb_fn; 414 void *cb_arg; 415 }; 416 417 static void 418 xnvme_bdev_unregister_cb(void *arg, int bdeverrno) 419 { 420 struct delete_xnvme_bdev_ctx *ctx = arg; 421 422 bdev_xnvme_destruct(ctx->xnvme); 423 ctx->cb_fn(ctx->cb_arg, bdeverrno); 424 free(ctx); 425 } 426 427 void 428 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg) 429 { 430 struct delete_xnvme_bdev_ctx *ctx; 431 struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt; 432 433 if (!bdev || bdev->module != &xnvme_if) { 434 cb_fn(cb_arg, -ENODEV); 435 return; 436 } 437 438 ctx = calloc(1, sizeof(*ctx)); 439 if (ctx == NULL) { 440 cb_fn(cb_arg, -ENOMEM); 441 return; 442 } 443 444 ctx->xnvme = xnvme; 445 ctx->cb_fn = cb_fn; 446 ctx->cb_arg = cb_arg; 447 spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx); 448 } 449 450 static int 451 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf) 452 { 453 return 0; 454 } 455 456 static void 457 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf) 458 { 459 } 460 461 static int 462 bdev_xnvme_init(void) 463 { 464 spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb, 465 0, "xnvme_module"); 466 467 return 0; 468 } 469 470 static void 471 bdev_xnvme_fini(void) 472 { 473 spdk_io_device_unregister(&xnvme_if, NULL); 474 } 475 476 SPDK_LOG_REGISTER_COMPONENT(xnvme) 477