1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Samsung Electronics Co., Ltd. 3 * All rights reserved. 4 */ 5 6 #include "libxnvme.h" 7 #include "libxnvme_pp.h" 8 9 #include "bdev_xnvme.h" 10 11 #include "spdk/stdinc.h" 12 13 #include "spdk/barrier.h" 14 #include "spdk/bdev.h" 15 #include "spdk/env.h" 16 #include "spdk/fd.h" 17 #include "spdk/likely.h" 18 #include "spdk/thread.h" 19 #include "spdk/json.h" 20 #include "spdk/util.h" 21 #include "spdk/string.h" 22 23 #include "spdk/log.h" 24 25 struct bdev_xnvme_io_channel { 26 struct xnvme_queue *queue; 27 struct spdk_poller *poller; 28 }; 29 30 struct bdev_xnvme_task { 31 struct bdev_xnvme_io_channel *ch; 32 TAILQ_ENTRY(bdev_xnvme_task) link; 33 }; 34 35 struct bdev_xnvme { 36 struct spdk_bdev bdev; 37 char *filename; 38 char *io_mechanism; 39 struct xnvme_dev *dev; 40 uint32_t nsid; 41 42 TAILQ_ENTRY(bdev_xnvme) link; 43 }; 44 45 static int bdev_xnvme_init(void); 46 static void bdev_xnvme_fini(void); 47 static void bdev_xnvme_free(struct bdev_xnvme *xnvme); 48 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head); 49 50 static int 51 bdev_xnvme_get_ctx_size(void) 52 { 53 return sizeof(struct bdev_xnvme_task); 54 } 55 56 static int 57 bdev_xnvme_config_json(struct spdk_json_write_ctx *w) 58 { 59 struct bdev_xnvme *xnvme; 60 61 TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) { 62 spdk_json_write_object_begin(w); 63 64 spdk_json_write_named_string(w, "method", "bdev_xnvme_create"); 65 66 spdk_json_write_named_object_begin(w, "params"); 67 spdk_json_write_named_string(w, "name", xnvme->bdev.name); 68 spdk_json_write_named_string(w, "filename", xnvme->filename); 69 spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); 70 spdk_json_write_object_end(w); 71 72 spdk_json_write_object_end(w); 73 } 74 75 return 0; 76 } 77 78 static struct spdk_bdev_module xnvme_if = { 79 .name = "xnvme", 80 .module_init = bdev_xnvme_init, 81 .module_fini = bdev_xnvme_fini, 82 .get_ctx_size = bdev_xnvme_get_ctx_size, 83 .config_json = bdev_xnvme_config_json, 84 }; 85 86 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if) 87 88 static struct spdk_io_channel * 89 bdev_xnvme_get_io_channel(void *ctx) 90 { 91 struct bdev_xnvme *xnvme = ctx; 92 93 return spdk_get_io_channel(xnvme); 94 } 95 96 static bool 97 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 98 { 99 switch (io_type) { 100 case SPDK_BDEV_IO_TYPE_READ: 101 case SPDK_BDEV_IO_TYPE_WRITE: 102 return true; 103 default: 104 return false; 105 } 106 } 107 108 static void 109 bdev_xnvme_destruct_cb(void *io_device) 110 { 111 struct bdev_xnvme *xnvme = io_device; 112 113 TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link); 114 bdev_xnvme_free(xnvme); 115 } 116 117 static int 118 bdev_xnvme_destruct(void *ctx) 119 { 120 struct bdev_xnvme *xnvme = ctx; 121 122 spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb); 123 124 return 0; 125 } 126 127 static void 128 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 129 { 130 struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx; 131 struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt; 132 struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch); 133 struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue); 134 int err; 135 136 if (!success) { 137 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 138 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 139 return; 140 } 141 142 SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n", 143 bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx); 144 145 switch (bdev_io->type) { 146 case SPDK_BDEV_IO_TYPE_READ: 147 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ; 148 ctx->cmd.common.nsid = xnvme->nsid; 149 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 150 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 151 break; 152 case SPDK_BDEV_IO_TYPE_WRITE: 153 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE; 154 ctx->cmd.common.nsid = xnvme->nsid; 155 ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1; 156 ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks; 157 break; 158 159 default: 160 SPDK_ERRLOG("Wrong io type\n"); 161 162 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 163 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 164 return; 165 } 166 167 xnvme_task->ch = xnvme_ch; 168 ctx->async.cb_arg = xnvme_task; 169 170 err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 171 bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0); 172 173 switch (err) { 174 /* Submission success! */ 175 case 0: 176 SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n", 177 xnvme_ch, bdev_io->u.bdev.iovcnt, 178 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); 179 return; 180 181 /* Submission failed: queue is full or no memory => Queue the I/O in bdev layer */ 182 case -EBUSY: 183 case -EAGAIN: 184 case -ENOMEM: 185 SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n"); 186 187 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 188 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 189 return; 190 191 /* Submission failed: unexpected error, put the command-context back in the queue */ 192 default: 193 SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n"); 194 195 xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx); 196 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 197 return; 198 } 199 } 200 201 static void 202 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 203 { 204 switch (bdev_io->type) { 205 /* Read and write operations must be performed on buffers aligned to 206 * bdev->required_alignment. If user specified unaligned buffers, 207 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */ 208 case SPDK_BDEV_IO_TYPE_READ: 209 case SPDK_BDEV_IO_TYPE_WRITE: 210 spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb, 211 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 212 break; 213 214 default: 215 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 216 break; 217 } 218 } 219 220 static const struct spdk_bdev_fn_table xnvme_fn_table = { 221 .destruct = bdev_xnvme_destruct, 222 .submit_request = bdev_xnvme_submit_request, 223 .io_type_supported = bdev_xnvme_io_type_supported, 224 .get_io_channel = bdev_xnvme_get_io_channel, 225 }; 226 227 static void 228 bdev_xnvme_free(struct bdev_xnvme *xnvme) 229 { 230 assert(xnvme != NULL); 231 232 xnvme_dev_close(xnvme->dev); 233 free(xnvme->io_mechanism); 234 free(xnvme->filename); 235 free(xnvme->bdev.name); 236 free(xnvme); 237 } 238 239 static void 240 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg) 241 { 242 struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg; 243 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 244 245 SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task); 246 247 if (xnvme_cmd_ctx_cpl_status(ctx)) { 248 SPDK_ERRLOG("xNVMe I/O Failed\n"); 249 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF); 250 status = SPDK_BDEV_IO_STATUS_FAILED; 251 } 252 253 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status); 254 255 /* Completed: Put the command- context back in the queue */ 256 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx); 257 } 258 259 static int 260 bdev_xnvme_poll(void *arg) 261 { 262 struct bdev_xnvme_io_channel *ch = arg; 263 int rc; 264 265 rc = xnvme_queue_poke(ch->queue, 0); 266 if (rc < 0) { 267 SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc); 268 return SPDK_POLLER_BUSY; 269 } 270 271 return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 272 } 273 274 static int 275 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf) 276 { 277 struct bdev_xnvme *xnvme = io_device; 278 struct bdev_xnvme_io_channel *ch = ctx_buf; 279 int rc; 280 int qd = 512; 281 282 rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue); 283 if (rc) { 284 SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc); 285 return 1; 286 } 287 288 xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch); 289 290 ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0); 291 292 return 0; 293 } 294 295 static void 296 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf) 297 { 298 struct bdev_xnvme_io_channel *ch = ctx_buf; 299 300 spdk_poller_unregister(&ch->poller); 301 302 xnvme_queue_term(ch->queue); 303 } 304 305 struct spdk_bdev * 306 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism) 307 { 308 struct bdev_xnvme *xnvme; 309 uint32_t block_size; 310 uint64_t bdev_size; 311 int rc; 312 struct xnvme_opts opts = xnvme_opts_default(); 313 314 xnvme = calloc(1, sizeof(*xnvme)); 315 if (!xnvme) { 316 SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n"); 317 return NULL; 318 } 319 320 opts.direct = 1; 321 opts.async = io_mechanism; 322 if (!opts.async) { 323 goto error_return; 324 } 325 xnvme->io_mechanism = strdup(io_mechanism); 326 if (!xnvme->io_mechanism) { 327 goto error_return; 328 } 329 330 if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { 331 opts.poll_sq = 1; 332 } 333 334 xnvme->filename = strdup(filename); 335 if (!xnvme->filename) { 336 goto error_return; 337 } 338 339 xnvme->dev = xnvme_dev_open(xnvme->filename, &opts); 340 if (!xnvme->dev) { 341 SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename); 342 goto error_return; 343 } 344 345 xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev); 346 347 bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes; 348 block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes; 349 350 xnvme->bdev.name = strdup(name); 351 if (!xnvme->bdev.name) { 352 goto error_return; 353 } 354 355 xnvme->bdev.product_name = "xNVMe bdev"; 356 xnvme->bdev.module = &xnvme_if; 357 358 xnvme->bdev.write_cache = 0; 359 360 if (block_size == 0) { 361 SPDK_ERRLOG("Block size could not be auto-detected\n"); 362 goto error_return; 363 } 364 365 if (block_size < 512) { 366 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size); 367 goto error_return; 368 } 369 370 if (!spdk_u32_is_pow2(block_size)) { 371 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size); 372 goto error_return; 373 } 374 375 SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n", 376 xnvme->bdev.name, bdev_size, block_size); 377 378 xnvme->bdev.blocklen = block_size; 379 xnvme->bdev.required_alignment = spdk_u32log2(block_size); 380 381 if (bdev_size % xnvme->bdev.blocklen != 0) { 382 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n", 383 bdev_size, xnvme->bdev.blocklen); 384 goto error_return; 385 } 386 387 xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen; 388 xnvme->bdev.ctxt = xnvme; 389 390 xnvme->bdev.fn_table = &xnvme_fn_table; 391 392 spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb, 393 sizeof(struct bdev_xnvme_io_channel), 394 xnvme->bdev.name); 395 rc = spdk_bdev_register(&xnvme->bdev); 396 if (rc) { 397 spdk_io_device_unregister(xnvme, NULL); 398 goto error_return; 399 } 400 401 TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link); 402 403 return &xnvme->bdev; 404 405 error_return: 406 bdev_xnvme_free(xnvme); 407 return NULL; 408 } 409 410 struct delete_xnvme_bdev_ctx { 411 struct bdev_xnvme *xnvme; 412 spdk_delete_xnvme_complete cb_fn; 413 void *cb_arg; 414 }; 415 416 static void 417 xnvme_bdev_unregister_cb(void *arg, int bdeverrno) 418 { 419 struct delete_xnvme_bdev_ctx *ctx = arg; 420 421 bdev_xnvme_destruct(ctx->xnvme); 422 ctx->cb_fn(ctx->cb_arg, bdeverrno); 423 free(ctx); 424 } 425 426 void 427 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg) 428 { 429 struct delete_xnvme_bdev_ctx *ctx; 430 struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt; 431 432 if (!bdev || bdev->module != &xnvme_if) { 433 cb_fn(cb_arg, -ENODEV); 434 return; 435 } 436 437 ctx = calloc(1, sizeof(*ctx)); 438 if (ctx == NULL) { 439 cb_fn(cb_arg, -ENOMEM); 440 return; 441 } 442 443 ctx->xnvme = xnvme; 444 ctx->cb_fn = cb_fn; 445 ctx->cb_arg = cb_arg; 446 spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx); 447 } 448 449 static int 450 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf) 451 { 452 return 0; 453 } 454 455 static void 456 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf) 457 { 458 } 459 460 static int 461 bdev_xnvme_init(void) 462 { 463 spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb, 464 0, "xnvme_module"); 465 466 return 0; 467 } 468 469 static void 470 bdev_xnvme_fini(void) 471 { 472 spdk_io_device_unregister(&xnvme_if, NULL); 473 } 474 475 SPDK_LOG_REGISTER_COMPONENT(xnvme) 476