1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_nvme.h" 37 #include "bdev_ocssd.h" 38 39 #include "spdk/config.h" 40 #include "spdk/conf.h" 41 #include "spdk/endian.h" 42 #include "spdk/bdev.h" 43 #include "spdk/json.h" 44 #include "spdk/nvme.h" 45 #include "spdk/nvme_ocssd.h" 46 #include "spdk/thread.h" 47 #include "spdk/string.h" 48 #include "spdk/likely.h" 49 #include "spdk/util.h" 50 51 #include "spdk/bdev_module.h" 52 #include "spdk_internal/log.h" 53 54 #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true 55 56 static void bdev_nvme_get_spdk_running_config(FILE *fp); 57 static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); 58 59 struct nvme_bdev_io { 60 /** array of iovecs to transfer. */ 61 struct iovec *iovs; 62 63 /** Number of iovecs in iovs array. */ 64 int iovcnt; 65 66 /** Current iovec position. */ 67 int iovpos; 68 69 /** Offset in current iovec. */ 70 uint32_t iov_offset; 71 72 /** array of iovecs to transfer. */ 73 struct iovec *fused_iovs; 74 75 /** Number of iovecs in iovs array. */ 76 int fused_iovcnt; 77 78 /** Current iovec position. */ 79 int fused_iovpos; 80 81 /** Offset in current iovec. */ 82 uint32_t fused_iov_offset; 83 84 /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ 85 struct spdk_nvme_cpl cpl; 86 87 /** Originating thread */ 88 struct spdk_thread *orig_thread; 89 90 /** Keeps track if first of fused commands was submitted */ 91 bool first_fused_submitted; 92 }; 93 94 struct nvme_probe_ctx { 95 size_t count; 96 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; 97 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; 98 const char *names[NVME_MAX_CONTROLLERS]; 99 uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; 100 const char *hostnqn; 101 }; 102 103 struct nvme_probe_skip_entry { 104 struct spdk_nvme_transport_id trid; 105 TAILQ_ENTRY(nvme_probe_skip_entry) tailq; 106 }; 107 /* All the controllers deleted by users via RPC are skipped by hotplug monitor */ 108 static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER( 109 g_skipped_nvme_ctrlrs); 110 111 static struct spdk_bdev_nvme_opts g_opts = { 112 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, 113 .timeout_us = 0, 114 .retry_count = 4, 115 .arbitration_burst = 0, 116 .low_priority_weight = 0, 117 .medium_priority_weight = 0, 118 .high_priority_weight = 0, 119 .nvme_adminq_poll_period_us = 10000ULL, 120 .nvme_ioq_poll_period_us = 0, 121 .io_queue_requests = 0, 122 .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, 123 }; 124 125 #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL 126 #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL 127 128 static int g_hot_insert_nvme_controller_index = 0; 129 static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; 130 static bool g_nvme_hotplug_enabled = false; 131 static struct spdk_thread *g_bdev_nvme_init_thread; 132 static struct spdk_poller *g_hotplug_poller; 133 static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; 134 static char *g_nvme_hostnqn = NULL; 135 136 static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 137 struct nvme_async_probe_ctx *ctx); 138 static void nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx); 139 static int bdev_nvme_library_init(void); 140 static void bdev_nvme_library_fini(void); 141 static int bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 142 struct nvme_bdev_io *bio, 143 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 144 static int bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 145 struct nvme_bdev_io *bio, 146 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 147 static int bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 148 struct nvme_bdev_io *bio, 149 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 150 static int bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 151 struct nvme_bdev_io *bio, 152 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 153 static int bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 154 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 155 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba); 156 static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 157 struct nvme_bdev_io *bio, 158 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 159 static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 160 struct nvme_bdev_io *bio, 161 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 162 static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 163 struct nvme_bdev_io *bio, 164 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); 165 static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio); 166 167 typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 168 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 169 static void nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 170 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 171 172 static populate_namespace_fn g_populate_namespace_fn[] = { 173 NULL, 174 nvme_ctrlr_populate_standard_namespace, 175 bdev_ocssd_populate_namespace, 176 }; 177 178 typedef void (*depopulate_namespace_fn)(struct nvme_bdev_ns *ns); 179 static void nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns); 180 181 static depopulate_namespace_fn g_depopulate_namespace_fn[] = { 182 NULL, 183 nvme_ctrlr_depopulate_standard_namespace, 184 bdev_ocssd_depopulate_namespace, 185 }; 186 187 typedef void (*config_json_namespace_fn)(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns); 188 static void nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, 189 struct nvme_bdev_ns *ns); 190 191 static config_json_namespace_fn g_config_json_namespace_fn[] = { 192 NULL, 193 nvme_ctrlr_config_json_standard_namespace, 194 bdev_ocssd_namespace_config_json, 195 }; 196 197 struct spdk_nvme_qpair * 198 spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) 199 { 200 struct nvme_io_channel *nvme_ch; 201 202 nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); 203 204 return nvme_ch->qpair; 205 } 206 207 static int 208 bdev_nvme_get_ctx_size(void) 209 { 210 return sizeof(struct nvme_bdev_io); 211 } 212 213 static struct spdk_bdev_module nvme_if = { 214 .name = "nvme", 215 .async_fini = true, 216 .module_init = bdev_nvme_library_init, 217 .module_fini = bdev_nvme_library_fini, 218 .config_text = bdev_nvme_get_spdk_running_config, 219 .config_json = bdev_nvme_config_json, 220 .get_ctx_size = bdev_nvme_get_ctx_size, 221 222 }; 223 SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if) 224 225 static int 226 bdev_nvme_poll(void *arg) 227 { 228 struct nvme_io_channel *ch = arg; 229 int32_t num_completions; 230 231 if (ch->qpair == NULL) { 232 return -1; 233 } 234 235 if (ch->collect_spin_stat && ch->start_ticks == 0) { 236 ch->start_ticks = spdk_get_ticks(); 237 } 238 239 num_completions = spdk_nvme_qpair_process_completions(ch->qpair, 0); 240 241 if (ch->collect_spin_stat) { 242 if (num_completions > 0) { 243 if (ch->end_ticks != 0) { 244 ch->spin_ticks += (ch->end_ticks - ch->start_ticks); 245 ch->end_ticks = 0; 246 } 247 ch->start_ticks = 0; 248 } else { 249 ch->end_ticks = spdk_get_ticks(); 250 } 251 } 252 253 return num_completions; 254 } 255 256 static int 257 bdev_nvme_poll_adminq(void *arg) 258 { 259 int32_t rc; 260 struct spdk_nvme_ctrlr *ctrlr = arg; 261 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 262 263 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 264 265 if (rc < 0) { 266 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 267 assert(nvme_bdev_ctrlr != NULL); 268 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 269 } 270 271 return rc; 272 } 273 274 static int 275 bdev_nvme_destruct(void *ctx) 276 { 277 struct nvme_bdev *nvme_disk = ctx; 278 279 nvme_bdev_detach_bdev_from_ns(nvme_disk); 280 281 free(nvme_disk->disk.name); 282 free(nvme_disk); 283 284 return 0; 285 } 286 287 static int 288 bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio, 289 uint64_t offset, uint64_t nbytes) 290 { 291 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); 292 293 return 0; 294 } 295 296 static void 297 _bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i) 298 { 299 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 300 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 301 struct spdk_bdev_io *bdev_io; 302 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 303 304 /* A NULL ctx means success. */ 305 if (spdk_io_channel_iter_get_ctx(i) != NULL) { 306 status = SPDK_BDEV_IO_STATUS_FAILED; 307 } 308 309 while (!TAILQ_EMPTY(&nvme_ch->pending_resets)) { 310 bdev_io = TAILQ_FIRST(&nvme_ch->pending_resets); 311 TAILQ_REMOVE(&nvme_ch->pending_resets, bdev_io, module_link); 312 spdk_bdev_io_complete(bdev_io, status); 313 } 314 315 spdk_for_each_channel_continue(i, 0); 316 } 317 318 static void 319 _bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc) 320 { 321 /* we are using the for_each_channel cb_arg like a return code here. */ 322 /* If it's zero, we succeeded, otherwise, the reset failed. */ 323 void *cb_arg = NULL; 324 325 if (rc) { 326 cb_arg = (void *)0x1; 327 SPDK_ERRLOG("Resetting controller failed.\n"); 328 } else { 329 SPDK_NOTICELOG("Resetting controller successful.\n"); 330 } 331 332 pthread_mutex_lock(&g_bdev_nvme_mutex); 333 nvme_bdev_ctrlr->resetting = false; 334 pthread_mutex_unlock(&g_bdev_nvme_mutex); 335 /* Make sure we clear any pending resets before returning. */ 336 spdk_for_each_channel(nvme_bdev_ctrlr, 337 _bdev_nvme_complete_pending_resets, 338 cb_arg, NULL); 339 } 340 341 static void 342 _bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status) 343 { 344 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 345 void *ctx = spdk_io_channel_iter_get_ctx(i); 346 int rc = SPDK_BDEV_IO_STATUS_SUCCESS; 347 348 if (status) { 349 rc = SPDK_BDEV_IO_STATUS_FAILED; 350 } 351 if (ctx) { 352 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc); 353 } 354 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 355 } 356 357 static void 358 _bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) 359 { 360 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 361 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 362 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 363 struct spdk_nvme_io_qpair_opts opts; 364 365 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 366 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 367 368 nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 369 if (!nvme_ch->qpair) { 370 spdk_for_each_channel_continue(i, -1); 371 return; 372 } 373 374 spdk_for_each_channel_continue(i, 0); 375 } 376 377 static void 378 _bdev_nvme_reset(struct spdk_io_channel_iter *i, int status) 379 { 380 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 381 struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i); 382 int rc; 383 384 if (status) { 385 if (bio) { 386 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 387 } 388 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 389 return; 390 } 391 392 rc = spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr); 393 if (rc != 0) { 394 if (bio) { 395 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 396 } 397 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, rc); 398 return; 399 } 400 401 /* Recreate all of the I/O queue pairs */ 402 spdk_for_each_channel(nvme_bdev_ctrlr, 403 _bdev_nvme_reset_create_qpair, 404 bio, 405 _bdev_nvme_reset_create_qpairs_done); 406 407 408 } 409 410 static void 411 _bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) 412 { 413 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 414 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 415 int rc; 416 417 rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 418 if (!rc) { 419 nvme_ch->qpair = NULL; 420 } 421 422 spdk_for_each_channel_continue(i, rc); 423 } 424 425 static int 426 bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio) 427 { 428 struct spdk_io_channel *ch; 429 struct nvme_io_channel *nvme_ch; 430 431 pthread_mutex_lock(&g_bdev_nvme_mutex); 432 if (nvme_bdev_ctrlr->destruct) { 433 /* Don't bother resetting if the controller is in the process of being destructed. */ 434 if (bio) { 435 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 436 } 437 pthread_mutex_unlock(&g_bdev_nvme_mutex); 438 return 0; 439 } 440 441 if (!nvme_bdev_ctrlr->resetting) { 442 nvme_bdev_ctrlr->resetting = true; 443 } else { 444 pthread_mutex_unlock(&g_bdev_nvme_mutex); 445 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 446 /* 447 * The internal reset calls won't be queued. This is on purpose so that we don't 448 * interfere with the app framework reset strategy. i.e. we are deferring to the 449 * upper level. If they are in the middle of a reset, we won't try to schedule another one. 450 */ 451 if (bio) { 452 ch = spdk_get_io_channel(nvme_bdev_ctrlr); 453 assert(ch != NULL); 454 nvme_ch = spdk_io_channel_get_ctx(ch); 455 TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link); 456 spdk_put_io_channel(ch); 457 } 458 return 0; 459 } 460 461 pthread_mutex_unlock(&g_bdev_nvme_mutex); 462 /* First, delete all NVMe I/O queue pairs. */ 463 spdk_for_each_channel(nvme_bdev_ctrlr, 464 _bdev_nvme_reset_destroy_qpair, 465 bio, 466 _bdev_nvme_reset); 467 468 return 0; 469 } 470 471 static int 472 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 473 struct nvme_bdev_io *bio, 474 uint64_t offset_blocks, 475 uint64_t num_blocks); 476 477 static void 478 bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 479 bool success) 480 { 481 int ret; 482 483 if (!success) { 484 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 485 return; 486 } 487 488 ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 489 ch, 490 (struct nvme_bdev_io *)bdev_io->driver_ctx, 491 bdev_io->u.bdev.iovs, 492 bdev_io->u.bdev.iovcnt, 493 bdev_io->u.bdev.md_buf, 494 bdev_io->u.bdev.num_blocks, 495 bdev_io->u.bdev.offset_blocks); 496 497 if (spdk_likely(ret == 0)) { 498 return; 499 } else if (ret == -ENOMEM) { 500 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 501 } else { 502 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 503 } 504 } 505 506 static int 507 _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 508 { 509 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 510 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 511 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; 512 513 if (nvme_ch->qpair == NULL) { 514 /* The device is currently resetting */ 515 return -1; 516 } 517 518 switch (bdev_io->type) { 519 case SPDK_BDEV_IO_TYPE_READ: 520 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, 521 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 522 return 0; 523 524 case SPDK_BDEV_IO_TYPE_WRITE: 525 return bdev_nvme_writev(nbdev, 526 ch, 527 nbdev_io, 528 bdev_io->u.bdev.iovs, 529 bdev_io->u.bdev.iovcnt, 530 bdev_io->u.bdev.md_buf, 531 bdev_io->u.bdev.num_blocks, 532 bdev_io->u.bdev.offset_blocks); 533 534 case SPDK_BDEV_IO_TYPE_COMPARE: 535 return bdev_nvme_comparev(nbdev, 536 ch, 537 nbdev_io, 538 bdev_io->u.bdev.iovs, 539 bdev_io->u.bdev.iovcnt, 540 bdev_io->u.bdev.md_buf, 541 bdev_io->u.bdev.num_blocks, 542 bdev_io->u.bdev.offset_blocks); 543 544 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 545 return bdev_nvme_comparev_and_writev(nbdev, 546 ch, 547 nbdev_io, 548 bdev_io->u.bdev.iovs, 549 bdev_io->u.bdev.iovcnt, 550 bdev_io->u.bdev.fused_iovs, 551 bdev_io->u.bdev.fused_iovcnt, 552 bdev_io->u.bdev.md_buf, 553 bdev_io->u.bdev.num_blocks, 554 bdev_io->u.bdev.offset_blocks); 555 556 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 557 return bdev_nvme_unmap(nbdev, 558 ch, 559 nbdev_io, 560 bdev_io->u.bdev.offset_blocks, 561 bdev_io->u.bdev.num_blocks); 562 563 case SPDK_BDEV_IO_TYPE_UNMAP: 564 return bdev_nvme_unmap(nbdev, 565 ch, 566 nbdev_io, 567 bdev_io->u.bdev.offset_blocks, 568 bdev_io->u.bdev.num_blocks); 569 570 case SPDK_BDEV_IO_TYPE_RESET: 571 return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io); 572 573 case SPDK_BDEV_IO_TYPE_FLUSH: 574 return bdev_nvme_flush(nbdev, 575 nbdev_io, 576 bdev_io->u.bdev.offset_blocks, 577 bdev_io->u.bdev.num_blocks); 578 579 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 580 return bdev_nvme_admin_passthru(nbdev, 581 ch, 582 nbdev_io, 583 &bdev_io->u.nvme_passthru.cmd, 584 bdev_io->u.nvme_passthru.buf, 585 bdev_io->u.nvme_passthru.nbytes); 586 587 case SPDK_BDEV_IO_TYPE_NVME_IO: 588 return bdev_nvme_io_passthru(nbdev, 589 ch, 590 nbdev_io, 591 &bdev_io->u.nvme_passthru.cmd, 592 bdev_io->u.nvme_passthru.buf, 593 bdev_io->u.nvme_passthru.nbytes); 594 595 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 596 return bdev_nvme_io_passthru_md(nbdev, 597 ch, 598 nbdev_io, 599 &bdev_io->u.nvme_passthru.cmd, 600 bdev_io->u.nvme_passthru.buf, 601 bdev_io->u.nvme_passthru.nbytes, 602 bdev_io->u.nvme_passthru.md_buf, 603 bdev_io->u.nvme_passthru.md_len); 604 605 default: 606 return -EINVAL; 607 } 608 return 0; 609 } 610 611 static void 612 bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 613 { 614 int rc = _bdev_nvme_submit_request(ch, bdev_io); 615 616 if (spdk_unlikely(rc != 0)) { 617 if (rc == -ENOMEM) { 618 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 619 } else { 620 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 621 } 622 } 623 } 624 625 static bool 626 bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 627 { 628 struct nvme_bdev *nbdev = ctx; 629 const struct spdk_nvme_ctrlr_data *cdata; 630 631 switch (io_type) { 632 case SPDK_BDEV_IO_TYPE_READ: 633 case SPDK_BDEV_IO_TYPE_WRITE: 634 case SPDK_BDEV_IO_TYPE_RESET: 635 case SPDK_BDEV_IO_TYPE_FLUSH: 636 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 637 case SPDK_BDEV_IO_TYPE_NVME_IO: 638 return true; 639 640 case SPDK_BDEV_IO_TYPE_COMPARE: 641 return spdk_nvme_ns_supports_compare(nbdev->nvme_ns->ns); 642 643 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 644 return spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns) ? true : false; 645 646 case SPDK_BDEV_IO_TYPE_UNMAP: 647 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 648 return cdata->oncs.dsm; 649 650 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 651 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 652 /* 653 * If an NVMe controller guarantees reading unallocated blocks returns zero, 654 * we can implement WRITE_ZEROES as an NVMe deallocate command. 655 */ 656 if (cdata->oncs.dsm && 657 spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->nvme_ns->ns) == 658 SPDK_NVME_DEALLOC_READ_00) { 659 return true; 660 } 661 /* 662 * The NVMe controller write_zeroes function is currently not used by our driver. 663 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail. 664 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read. 665 */ 666 return false; 667 668 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 669 if (spdk_nvme_ctrlr_get_flags(nbdev->nvme_bdev_ctrlr->ctrlr) & 670 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) { 671 return true; 672 } 673 return false; 674 675 default: 676 return false; 677 } 678 } 679 680 static int 681 bdev_nvme_create_cb(void *io_device, void *ctx_buf) 682 { 683 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 684 struct nvme_io_channel *ch = ctx_buf; 685 struct spdk_nvme_io_qpair_opts opts; 686 687 #ifdef SPDK_CONFIG_VTUNE 688 ch->collect_spin_stat = true; 689 #else 690 ch->collect_spin_stat = false; 691 #endif 692 693 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 694 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 695 opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests); 696 g_opts.io_queue_requests = opts.io_queue_requests; 697 698 ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 699 700 if (ch->qpair == NULL) { 701 return -1; 702 } 703 704 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 705 if (bdev_ocssd_create_io_channel(ch)) { 706 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 707 return -1; 708 } 709 } 710 711 ch->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, ch, g_opts.nvme_ioq_poll_period_us); 712 713 TAILQ_INIT(&ch->pending_resets); 714 return 0; 715 } 716 717 static void 718 bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) 719 { 720 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 721 struct nvme_io_channel *ch = ctx_buf; 722 723 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 724 bdev_ocssd_destroy_io_channel(ch); 725 } 726 727 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 728 spdk_poller_unregister(&ch->poller); 729 } 730 731 static struct spdk_io_channel * 732 bdev_nvme_get_io_channel(void *ctx) 733 { 734 struct nvme_bdev *nvme_bdev = ctx; 735 736 return spdk_get_io_channel(nvme_bdev->nvme_bdev_ctrlr); 737 } 738 739 static int 740 bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 741 { 742 struct nvme_bdev *nvme_bdev = ctx; 743 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr; 744 const struct spdk_nvme_ctrlr_data *cdata; 745 struct spdk_nvme_ns *ns; 746 union spdk_nvme_vs_register vs; 747 union spdk_nvme_csts_register csts; 748 char buf[128]; 749 750 cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 751 vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 752 csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 753 ns = nvme_bdev->nvme_ns->ns; 754 755 spdk_json_write_named_object_begin(w, "nvme"); 756 757 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 758 spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->trid.traddr); 759 } 760 761 spdk_json_write_named_object_begin(w, "trid"); 762 763 nvme_bdev_dump_trid_json(&nvme_bdev_ctrlr->trid, w); 764 765 spdk_json_write_object_end(w); 766 767 #ifdef SPDK_CONFIG_NVME_CUSE 768 char *cuse_device; 769 770 cuse_device = spdk_nvme_cuse_get_ns_name(nvme_bdev->nvme_bdev_ctrlr->ctrlr, 771 spdk_nvme_ns_get_id(ns)); 772 if (cuse_device) { 773 spdk_json_write_named_string(w, "cuse_device", cuse_device); 774 } 775 #endif 776 777 spdk_json_write_named_object_begin(w, "ctrlr_data"); 778 779 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); 780 781 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); 782 spdk_str_trim(buf); 783 spdk_json_write_named_string(w, "model_number", buf); 784 785 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); 786 spdk_str_trim(buf); 787 spdk_json_write_named_string(w, "serial_number", buf); 788 789 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); 790 spdk_str_trim(buf); 791 spdk_json_write_named_string(w, "firmware_revision", buf); 792 793 spdk_json_write_named_object_begin(w, "oacs"); 794 795 spdk_json_write_named_uint32(w, "security", cdata->oacs.security); 796 spdk_json_write_named_uint32(w, "format", cdata->oacs.format); 797 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); 798 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); 799 800 spdk_json_write_object_end(w); 801 802 spdk_json_write_object_end(w); 803 804 spdk_json_write_named_object_begin(w, "vs"); 805 806 spdk_json_write_name(w, "nvme_version"); 807 if (vs.bits.ter) { 808 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); 809 } else { 810 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); 811 } 812 813 spdk_json_write_object_end(w); 814 815 spdk_json_write_named_object_begin(w, "csts"); 816 817 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); 818 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); 819 820 spdk_json_write_object_end(w); 821 822 spdk_json_write_named_object_begin(w, "ns_data"); 823 824 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); 825 826 spdk_json_write_object_end(w); 827 828 if (cdata->oacs.security) { 829 spdk_json_write_named_object_begin(w, "security"); 830 831 spdk_json_write_named_bool(w, "opal", spdk_opal_supported(nvme_bdev_ctrlr->opal_dev)); 832 833 spdk_json_write_object_end(w); 834 } 835 836 spdk_json_write_object_end(w); 837 838 return 0; 839 } 840 841 static void 842 bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 843 { 844 /* No config per bdev needed */ 845 } 846 847 static uint64_t 848 bdev_nvme_get_spin_time(struct spdk_io_channel *ch) 849 { 850 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 851 uint64_t spin_time; 852 853 if (!nvme_ch->collect_spin_stat) { 854 return 0; 855 } 856 857 if (nvme_ch->end_ticks != 0) { 858 nvme_ch->spin_ticks += (nvme_ch->end_ticks - nvme_ch->start_ticks); 859 nvme_ch->end_ticks = 0; 860 } 861 862 spin_time = (nvme_ch->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); 863 nvme_ch->start_ticks = 0; 864 nvme_ch->spin_ticks = 0; 865 866 return spin_time; 867 } 868 869 static const struct spdk_bdev_fn_table nvmelib_fn_table = { 870 .destruct = bdev_nvme_destruct, 871 .submit_request = bdev_nvme_submit_request, 872 .io_type_supported = bdev_nvme_io_type_supported, 873 .get_io_channel = bdev_nvme_get_io_channel, 874 .dump_info_json = bdev_nvme_dump_info_json, 875 .write_config_json = bdev_nvme_write_config_json, 876 .get_spin_time = bdev_nvme_get_spin_time, 877 }; 878 879 static void 880 nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 881 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx) 882 { 883 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 884 struct nvme_bdev *bdev; 885 struct spdk_nvme_ns *ns; 886 const struct spdk_uuid *uuid; 887 const struct spdk_nvme_ctrlr_data *cdata; 888 const struct spdk_nvme_ns_data *nsdata; 889 int rc; 890 891 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 892 893 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 894 if (!ns) { 895 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nvme_ns->id); 896 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -EINVAL); 897 return; 898 } 899 900 bdev = calloc(1, sizeof(*bdev)); 901 if (!bdev) { 902 SPDK_ERRLOG("bdev calloc() failed\n"); 903 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 904 return; 905 } 906 907 bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr; 908 nvme_ns->ns = ns; 909 bdev->nvme_ns = nvme_ns; 910 911 bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns)); 912 if (!bdev->disk.name) { 913 free(bdev); 914 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 915 return; 916 } 917 bdev->disk.product_name = "NVMe disk"; 918 919 bdev->disk.write_cache = 0; 920 if (cdata->vwc.present) { 921 /* Enable if the Volatile Write Cache exists */ 922 bdev->disk.write_cache = 1; 923 } 924 bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns); 925 bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); 926 bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); 927 928 uuid = spdk_nvme_ns_get_uuid(ns); 929 if (uuid != NULL) { 930 bdev->disk.uuid = *uuid; 931 } 932 933 nsdata = spdk_nvme_ns_get_data(ns); 934 935 bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns); 936 if (bdev->disk.md_len != 0) { 937 bdev->disk.md_interleave = nsdata->flbas.extended; 938 bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns); 939 if (bdev->disk.dif_type != SPDK_DIF_DISABLE) { 940 bdev->disk.dif_is_head_of_md = nsdata->dps.md_start; 941 bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags; 942 } 943 } 944 945 if (!bdev_nvme_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) { 946 bdev->disk.acwu = 0; 947 } else if (nsdata->nsfeat.ns_atomic_write_unit) { 948 bdev->disk.acwu = nsdata->nacwu; 949 } else { 950 bdev->disk.acwu = cdata->acwu; 951 } 952 953 bdev->disk.ctxt = bdev; 954 bdev->disk.fn_table = &nvmelib_fn_table; 955 bdev->disk.module = &nvme_if; 956 rc = spdk_bdev_register(&bdev->disk); 957 if (rc) { 958 free(bdev->disk.name); 959 free(bdev); 960 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, rc); 961 return; 962 } 963 964 nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev); 965 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0); 966 } 967 968 static bool 969 hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 970 struct spdk_nvme_ctrlr_opts *opts) 971 { 972 struct nvme_probe_skip_entry *entry; 973 974 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) { 975 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 976 return false; 977 } 978 } 979 980 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 981 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 982 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 983 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 984 985 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr); 986 987 return true; 988 } 989 990 static bool 991 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 992 struct spdk_nvme_ctrlr_opts *opts) 993 { 994 struct nvme_probe_ctx *ctx = cb_ctx; 995 996 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr); 997 998 if (nvme_bdev_ctrlr_get(trid)) { 999 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1000 trid->traddr); 1001 return false; 1002 } 1003 1004 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1005 bool claim_device = false; 1006 size_t i; 1007 1008 for (i = 0; i < ctx->count; i++) { 1009 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1010 claim_device = true; 1011 break; 1012 } 1013 } 1014 1015 if (!claim_device) { 1016 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr); 1017 return false; 1018 } 1019 } 1020 1021 if (ctx->hostnqn) { 1022 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); 1023 } 1024 1025 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1026 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1027 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1028 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1029 1030 return true; 1031 } 1032 1033 static void 1034 spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) 1035 { 1036 struct spdk_nvme_ctrlr *ctrlr = ctx; 1037 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1038 1039 if (spdk_nvme_cpl_is_error(cpl)) { 1040 SPDK_WARNLOG("Abort failed. Resetting controller.\n"); 1041 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1042 assert(nvme_bdev_ctrlr != NULL); 1043 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1044 } 1045 } 1046 1047 static void 1048 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, 1049 struct spdk_nvme_qpair *qpair, uint16_t cid) 1050 { 1051 int rc; 1052 union spdk_nvme_csts_register csts; 1053 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1054 1055 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); 1056 1057 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1058 if (csts.bits.cfs) { 1059 SPDK_ERRLOG("Controller Fatal Status, reset required\n"); 1060 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1061 assert(nvme_bdev_ctrlr != NULL); 1062 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1063 return; 1064 } 1065 1066 switch (g_opts.action_on_timeout) { 1067 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 1068 if (qpair) { 1069 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, 1070 spdk_nvme_abort_cpl, ctrlr); 1071 if (rc == 0) { 1072 return; 1073 } 1074 1075 SPDK_ERRLOG("Unable to send abort. Resetting.\n"); 1076 } 1077 1078 /* FALLTHROUGH */ 1079 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 1080 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1081 assert(nvme_bdev_ctrlr != NULL); 1082 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1083 break; 1084 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 1085 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n"); 1086 break; 1087 default: 1088 SPDK_ERRLOG("An invalid timeout action value is found.\n"); 1089 break; 1090 } 1091 } 1092 1093 void 1094 nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) 1095 { 1096 pthread_mutex_lock(&g_bdev_nvme_mutex); 1097 nvme_bdev_ctrlr->ref--; 1098 1099 if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) { 1100 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1101 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1102 return; 1103 } 1104 1105 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1106 } 1107 1108 static void 1109 nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns) 1110 { 1111 struct nvme_bdev *bdev, *tmp; 1112 1113 TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { 1114 spdk_bdev_unregister(&bdev->disk, NULL, NULL); 1115 } 1116 1117 ns->populated = false; 1118 1119 nvme_ctrlr_depopulate_namespace_done(ns->ctrlr); 1120 } 1121 1122 static void nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, 1123 struct nvme_async_probe_ctx *ctx) 1124 { 1125 g_populate_namespace_fn[ns->type](ctrlr, ns, ctx); 1126 } 1127 1128 static void nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns) 1129 { 1130 g_depopulate_namespace_fn[ns->type](ns); 1131 } 1132 1133 void 1134 nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx, 1135 struct nvme_bdev_ns *ns, int rc) 1136 { 1137 if (rc == 0) { 1138 ns->populated = true; 1139 pthread_mutex_lock(&g_bdev_nvme_mutex); 1140 ns->ctrlr->ref++; 1141 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1142 } else { 1143 memset(ns, 0, sizeof(*ns)); 1144 } 1145 1146 if (ctx) { 1147 ctx->populates_in_progress--; 1148 if (ctx->populates_in_progress == 0) { 1149 nvme_ctrlr_populate_namespaces_done(ctx); 1150 } 1151 } 1152 } 1153 1154 static void 1155 nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 1156 struct nvme_async_probe_ctx *ctx) 1157 { 1158 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 1159 struct nvme_bdev_ns *ns; 1160 struct spdk_nvme_ns *nvme_ns; 1161 struct nvme_bdev *bdev; 1162 uint32_t i; 1163 int rc; 1164 uint64_t num_sectors; 1165 bool ns_is_active; 1166 1167 if (ctx) { 1168 /* Initialize this count to 1 to handle the populate functions 1169 * calling nvme_ctrlr_populate_namespace_done() immediately. 1170 */ 1171 ctx->populates_in_progress = 1; 1172 } 1173 1174 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1175 uint32_t nsid = i + 1; 1176 1177 ns = nvme_bdev_ctrlr->namespaces[i]; 1178 ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); 1179 1180 if (ns->populated && ns_is_active && ns->type == NVME_BDEV_NS_STANDARD) { 1181 /* NS is still there but attributes may have changed */ 1182 nvme_ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 1183 num_sectors = spdk_nvme_ns_get_num_sectors(nvme_ns); 1184 bdev = TAILQ_FIRST(&ns->bdevs); 1185 if (bdev->disk.blockcnt != num_sectors) { 1186 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %lu, new size %lu\n", 1187 nsid, 1188 bdev->disk.name, 1189 bdev->disk.blockcnt, 1190 num_sectors); 1191 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors); 1192 if (rc != 0) { 1193 SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n", 1194 bdev->disk.name, rc); 1195 } 1196 } 1197 } 1198 1199 if (!ns->populated && ns_is_active) { 1200 ns->id = nsid; 1201 ns->ctrlr = nvme_bdev_ctrlr; 1202 if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { 1203 ns->type = NVME_BDEV_NS_OCSSD; 1204 } else { 1205 ns->type = NVME_BDEV_NS_STANDARD; 1206 } 1207 1208 TAILQ_INIT(&ns->bdevs); 1209 1210 if (ctx) { 1211 ctx->populates_in_progress++; 1212 } 1213 nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx); 1214 } 1215 1216 if (ns->populated && !ns_is_active) { 1217 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1218 } 1219 } 1220 1221 if (ctx) { 1222 /* Decrement this count now that the loop is over to account 1223 * for the one we started with. If the count is then 0, we 1224 * know any populate_namespace functions completed immediately, 1225 * so we'll kick the callback here. 1226 */ 1227 ctx->populates_in_progress--; 1228 if (ctx->populates_in_progress == 0) { 1229 nvme_ctrlr_populate_namespaces_done(ctx); 1230 } 1231 } 1232 1233 } 1234 1235 static void 1236 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1237 { 1238 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg; 1239 union spdk_nvme_async_event_completion event; 1240 1241 if (spdk_nvme_cpl_is_error(cpl)) { 1242 SPDK_WARNLOG("AER request execute failed"); 1243 return; 1244 } 1245 1246 event.raw = cpl->cdw0; 1247 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1248 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1249 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1250 } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR) && 1251 (event.bits.log_page_identifier == SPDK_OCSSD_LOG_CHUNK_NOTIFICATION) && 1252 spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1253 bdev_ocssd_handle_chunk_notification(nvme_bdev_ctrlr); 1254 } 1255 } 1256 1257 static int 1258 create_ctrlr(struct spdk_nvme_ctrlr *ctrlr, 1259 const char *name, 1260 const struct spdk_nvme_transport_id *trid, 1261 uint32_t prchk_flags) 1262 { 1263 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1264 uint32_t i; 1265 int rc; 1266 1267 nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr)); 1268 if (nvme_bdev_ctrlr == NULL) { 1269 SPDK_ERRLOG("Failed to allocate device struct\n"); 1270 return -ENOMEM; 1271 } 1272 nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 1273 nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *)); 1274 if (!nvme_bdev_ctrlr->namespaces) { 1275 SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); 1276 free(nvme_bdev_ctrlr); 1277 return -ENOMEM; 1278 } 1279 1280 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1281 nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns)); 1282 if (nvme_bdev_ctrlr->namespaces[i] == NULL) { 1283 SPDK_ERRLOG("Failed to allocate block namespace struct\n"); 1284 for (; i > 0; i--) { 1285 free(nvme_bdev_ctrlr->namespaces[i - 1]); 1286 } 1287 free(nvme_bdev_ctrlr->namespaces); 1288 free(nvme_bdev_ctrlr); 1289 return -ENOMEM; 1290 } 1291 } 1292 1293 nvme_bdev_ctrlr->adminq_timer_poller = NULL; 1294 nvme_bdev_ctrlr->ctrlr = ctrlr; 1295 nvme_bdev_ctrlr->ref = 0; 1296 nvme_bdev_ctrlr->trid = *trid; 1297 nvme_bdev_ctrlr->name = strdup(name); 1298 if (nvme_bdev_ctrlr->name == NULL) { 1299 free(nvme_bdev_ctrlr->namespaces); 1300 free(nvme_bdev_ctrlr); 1301 return -ENOMEM; 1302 } 1303 1304 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1305 rc = bdev_ocssd_init_ctrlr(nvme_bdev_ctrlr); 1306 if (spdk_unlikely(rc != 0)) { 1307 SPDK_ERRLOG("Unable to initialize OCSSD controller\n"); 1308 free(nvme_bdev_ctrlr->name); 1309 free(nvme_bdev_ctrlr->namespaces); 1310 free(nvme_bdev_ctrlr); 1311 return rc; 1312 } 1313 } 1314 1315 nvme_bdev_ctrlr->prchk_flags = prchk_flags; 1316 1317 spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb, 1318 sizeof(struct nvme_io_channel), 1319 name); 1320 1321 nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, ctrlr, 1322 g_opts.nvme_adminq_poll_period_us); 1323 1324 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq); 1325 1326 if (g_opts.timeout_us > 0) { 1327 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, 1328 timeout_cb, NULL); 1329 } 1330 1331 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr); 1332 1333 if (spdk_nvme_ctrlr_get_flags(nvme_bdev_ctrlr->ctrlr) & 1334 SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { 1335 nvme_bdev_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_bdev_ctrlr->ctrlr); 1336 if (nvme_bdev_ctrlr->opal_dev == NULL) { 1337 SPDK_ERRLOG("Failed to initialize Opal\n"); 1338 } 1339 } 1340 return 0; 1341 } 1342 1343 static void 1344 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1345 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1346 { 1347 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1348 struct nvme_probe_ctx *ctx = cb_ctx; 1349 char *name = NULL; 1350 uint32_t prchk_flags = 0; 1351 size_t i; 1352 1353 if (ctx) { 1354 for (i = 0; i < ctx->count; i++) { 1355 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1356 prchk_flags = ctx->prchk_flags[i]; 1357 name = strdup(ctx->names[i]); 1358 break; 1359 } 1360 } 1361 } else { 1362 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); 1363 } 1364 if (!name) { 1365 SPDK_ERRLOG("Failed to assign name to NVMe device\n"); 1366 return; 1367 } 1368 1369 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name); 1370 1371 create_ctrlr(ctrlr, name, trid, prchk_flags); 1372 1373 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); 1374 if (!nvme_bdev_ctrlr) { 1375 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1376 free(name); 1377 return; 1378 } 1379 1380 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1381 1382 free(name); 1383 } 1384 1385 static void 1386 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 1387 { 1388 uint32_t i; 1389 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1390 struct nvme_bdev_ns *ns; 1391 1392 pthread_mutex_lock(&g_bdev_nvme_mutex); 1393 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 1394 if (nvme_bdev_ctrlr->ctrlr == ctrlr) { 1395 /* The controller's destruction was already started */ 1396 if (nvme_bdev_ctrlr->destruct) { 1397 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1398 return; 1399 } 1400 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1401 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1402 uint32_t nsid = i + 1; 1403 1404 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1405 if (ns->populated) { 1406 assert(ns->id == nsid); 1407 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1408 } 1409 } 1410 1411 pthread_mutex_lock(&g_bdev_nvme_mutex); 1412 nvme_bdev_ctrlr->destruct = true; 1413 if (nvme_bdev_ctrlr->ref == 0) { 1414 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1415 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1416 } else { 1417 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1418 } 1419 return; 1420 } 1421 } 1422 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1423 } 1424 1425 static int 1426 bdev_nvme_hotplug(void *arg) 1427 { 1428 struct spdk_nvme_transport_id trid_pcie; 1429 int done; 1430 1431 if (!g_hotplug_probe_ctx) { 1432 memset(&trid_pcie, 0, sizeof(trid_pcie)); 1433 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 1434 1435 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL, 1436 hotplug_probe_cb, 1437 attach_cb, remove_cb); 1438 if (!g_hotplug_probe_ctx) { 1439 return -1; 1440 } 1441 } 1442 1443 done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx); 1444 if (done != -EAGAIN) { 1445 g_hotplug_probe_ctx = NULL; 1446 return 1; 1447 } 1448 1449 return -1; 1450 } 1451 1452 void 1453 spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) 1454 { 1455 *opts = g_opts; 1456 } 1457 1458 int 1459 spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) 1460 { 1461 if (g_bdev_nvme_init_thread != NULL) { 1462 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 1463 return -EPERM; 1464 } 1465 } 1466 1467 g_opts = *opts; 1468 1469 return 0; 1470 } 1471 1472 struct set_nvme_hotplug_ctx { 1473 uint64_t period_us; 1474 bool enabled; 1475 spdk_msg_fn fn; 1476 void *fn_ctx; 1477 }; 1478 1479 static void 1480 set_nvme_hotplug_period_cb(void *_ctx) 1481 { 1482 struct set_nvme_hotplug_ctx *ctx = _ctx; 1483 1484 spdk_poller_unregister(&g_hotplug_poller); 1485 if (ctx->enabled) { 1486 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us); 1487 } 1488 1489 g_nvme_hotplug_poll_period_us = ctx->period_us; 1490 g_nvme_hotplug_enabled = ctx->enabled; 1491 if (ctx->fn) { 1492 ctx->fn(ctx->fn_ctx); 1493 } 1494 1495 free(ctx); 1496 } 1497 1498 int 1499 spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx) 1500 { 1501 struct set_nvme_hotplug_ctx *ctx; 1502 1503 if (enabled == true && !spdk_process_is_primary()) { 1504 return -EPERM; 1505 } 1506 1507 ctx = calloc(1, sizeof(*ctx)); 1508 if (ctx == NULL) { 1509 return -ENOMEM; 1510 } 1511 1512 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; 1513 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); 1514 ctx->enabled = enabled; 1515 ctx->fn = cb; 1516 ctx->fn_ctx = cb_ctx; 1517 1518 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); 1519 return 0; 1520 } 1521 1522 static void 1523 populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) 1524 { 1525 if (ctx->cb_fn) { 1526 ctx->cb_fn(ctx->cb_ctx, count, rc); 1527 } 1528 1529 free(ctx); 1530 } 1531 1532 static void 1533 nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx) 1534 { 1535 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1536 struct nvme_bdev_ns *ns; 1537 struct nvme_bdev *nvme_bdev, *tmp; 1538 uint32_t i, nsid; 1539 size_t j; 1540 1541 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1542 1543 /* 1544 * Report the new bdevs that were created in this call. 1545 * There can be more than one bdev per NVMe controller. 1546 */ 1547 j = 0; 1548 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1549 nsid = i + 1; 1550 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1551 if (!ns->populated) { 1552 continue; 1553 } 1554 assert(ns->id == nsid); 1555 TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { 1556 if (j < ctx->count) { 1557 ctx->names[j] = nvme_bdev->disk.name; 1558 j++; 1559 } else { 1560 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n", 1561 ctx->count); 1562 populate_namespaces_cb(ctx, 0, -ERANGE); 1563 return; 1564 } 1565 } 1566 } 1567 1568 populate_namespaces_cb(ctx, j, 0); 1569 } 1570 1571 static void 1572 connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1573 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1574 { 1575 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 1576 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1577 struct nvme_async_probe_ctx *ctx; 1578 int rc; 1579 1580 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 1581 1582 spdk_poller_unregister(&ctx->poller); 1583 1584 rc = create_ctrlr(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags); 1585 if (rc) { 1586 SPDK_ERRLOG("Failed to create new device\n"); 1587 populate_namespaces_cb(ctx, 0, rc); 1588 return; 1589 } 1590 1591 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1592 assert(nvme_bdev_ctrlr != NULL); 1593 1594 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, ctx); 1595 } 1596 1597 static int 1598 bdev_nvme_async_poll(void *arg) 1599 { 1600 struct nvme_async_probe_ctx *ctx = arg; 1601 int rc; 1602 1603 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); 1604 if (spdk_unlikely(rc != -EAGAIN && rc != 0)) { 1605 spdk_poller_unregister(&ctx->poller); 1606 free(ctx); 1607 } 1608 1609 return 1; 1610 } 1611 1612 int 1613 spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid, 1614 struct spdk_nvme_host_id *hostid, 1615 const char *base_name, 1616 const char **names, 1617 uint32_t count, 1618 const char *hostnqn, 1619 uint32_t prchk_flags, 1620 spdk_bdev_create_nvme_fn cb_fn, 1621 void *cb_ctx) 1622 { 1623 struct nvme_probe_skip_entry *entry, *tmp; 1624 struct nvme_async_probe_ctx *ctx; 1625 1626 if (nvme_bdev_ctrlr_get(trid) != NULL) { 1627 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); 1628 return -EEXIST; 1629 } 1630 1631 if (nvme_bdev_ctrlr_get_by_name(base_name)) { 1632 SPDK_ERRLOG("A controller with the provided name (%s) already exists.\n", base_name); 1633 return -EEXIST; 1634 } 1635 1636 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1637 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { 1638 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1639 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1640 free(entry); 1641 break; 1642 } 1643 } 1644 } 1645 1646 ctx = calloc(1, sizeof(*ctx)); 1647 if (!ctx) { 1648 return -ENOMEM; 1649 } 1650 ctx->base_name = base_name; 1651 ctx->names = names; 1652 ctx->count = count; 1653 ctx->cb_fn = cb_fn; 1654 ctx->cb_ctx = cb_ctx; 1655 ctx->prchk_flags = prchk_flags; 1656 ctx->trid = *trid; 1657 1658 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts)); 1659 ctx->opts.transport_retry_count = g_opts.retry_count; 1660 1661 if (hostnqn) { 1662 snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn); 1663 } 1664 1665 if (hostid->hostaddr[0] != '\0') { 1666 snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr); 1667 } 1668 1669 if (hostid->hostsvcid[0] != '\0') { 1670 snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid); 1671 } 1672 1673 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb); 1674 if (ctx->probe_ctx == NULL) { 1675 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr); 1676 free(ctx); 1677 return -ENODEV; 1678 } 1679 ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); 1680 1681 return 0; 1682 } 1683 1684 int 1685 spdk_bdev_nvme_delete(const char *name) 1686 { 1687 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; 1688 struct nvme_probe_skip_entry *entry; 1689 1690 if (name == NULL) { 1691 return -EINVAL; 1692 } 1693 1694 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name); 1695 if (nvme_bdev_ctrlr == NULL) { 1696 SPDK_ERRLOG("Failed to find NVMe controller\n"); 1697 return -ENODEV; 1698 } 1699 1700 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1701 entry = calloc(1, sizeof(*entry)); 1702 if (!entry) { 1703 return -ENOMEM; 1704 } 1705 entry->trid = nvme_bdev_ctrlr->trid; 1706 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq); 1707 } 1708 1709 remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); 1710 return 0; 1711 } 1712 1713 static int 1714 bdev_nvme_library_init(void) 1715 { 1716 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1717 struct spdk_conf_section *sp; 1718 const char *val; 1719 int rc = 0; 1720 int64_t intval = 0; 1721 size_t i; 1722 struct nvme_probe_ctx *probe_ctx = NULL; 1723 int retry_count; 1724 uint32_t local_nvme_num = 0; 1725 int64_t hotplug_period; 1726 bool hotplug_enabled = g_nvme_hotplug_enabled; 1727 1728 g_bdev_nvme_init_thread = spdk_get_thread(); 1729 1730 sp = spdk_conf_find_section(NULL, "Nvme"); 1731 if (sp == NULL) { 1732 goto end; 1733 } 1734 1735 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1736 if (probe_ctx == NULL) { 1737 SPDK_ERRLOG("Failed to allocate probe_ctx\n"); 1738 rc = -1; 1739 goto end; 1740 } 1741 1742 retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); 1743 if (retry_count >= 0) { 1744 g_opts.retry_count = retry_count; 1745 } 1746 1747 val = spdk_conf_section_get_val(sp, "TimeoutUsec"); 1748 if (val != NULL) { 1749 intval = spdk_strtoll(val, 10); 1750 if (intval < 0) { 1751 SPDK_ERRLOG("Invalid TimeoutUsec value\n"); 1752 rc = -1; 1753 goto end; 1754 } 1755 } 1756 1757 g_opts.timeout_us = intval; 1758 1759 if (g_opts.timeout_us > 0) { 1760 val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); 1761 if (val != NULL) { 1762 if (!strcasecmp(val, "Reset")) { 1763 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; 1764 } else if (!strcasecmp(val, "Abort")) { 1765 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; 1766 } 1767 } 1768 } 1769 1770 intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); 1771 if (intval > 0) { 1772 g_opts.nvme_adminq_poll_period_us = intval; 1773 } 1774 1775 intval = spdk_conf_section_get_intval(sp, "IOPollRate"); 1776 if (intval > 0) { 1777 g_opts.nvme_ioq_poll_period_us = intval; 1778 } 1779 1780 if (spdk_process_is_primary()) { 1781 hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false); 1782 } 1783 1784 hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate"); 1785 if (hotplug_period < 0) { 1786 hotplug_period = 0; 1787 } 1788 1789 g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN"); 1790 probe_ctx->hostnqn = g_nvme_hostnqn; 1791 1792 g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit", 1793 SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT); 1794 1795 for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { 1796 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); 1797 if (val == NULL) { 1798 break; 1799 } 1800 1801 rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); 1802 if (rc < 0) { 1803 SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); 1804 rc = -1; 1805 goto end; 1806 } 1807 1808 rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val); 1809 if (rc < 0) { 1810 SPDK_ERRLOG("Unable to parse HostID: %s\n", val); 1811 rc = -1; 1812 goto end; 1813 } 1814 1815 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); 1816 if (val == NULL) { 1817 SPDK_ERRLOG("No name provided for TransportID\n"); 1818 rc = -1; 1819 goto end; 1820 } 1821 1822 probe_ctx->names[i] = val; 1823 1824 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2); 1825 if (val != NULL) { 1826 rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val); 1827 if (rc < 0) { 1828 SPDK_ERRLOG("Unable to parse prchk: %s\n", val); 1829 rc = -1; 1830 goto end; 1831 } 1832 } 1833 1834 probe_ctx->count++; 1835 1836 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 1837 struct spdk_nvme_ctrlr *ctrlr; 1838 struct spdk_nvme_ctrlr_opts opts; 1839 1840 if (nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 1841 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1842 probe_ctx->trids[i].traddr); 1843 rc = -1; 1844 goto end; 1845 } 1846 1847 if (probe_ctx->trids[i].subnqn[0] == '\0') { 1848 SPDK_ERRLOG("Need to provide subsystem nqn\n"); 1849 rc = -1; 1850 goto end; 1851 } 1852 1853 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 1854 opts.transport_retry_count = g_opts.retry_count; 1855 1856 if (probe_ctx->hostnqn != NULL) { 1857 snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn); 1858 } 1859 1860 if (probe_ctx->hostids[i].hostaddr[0] != '\0') { 1861 snprintf(opts.src_addr, sizeof(opts.src_addr), "%s", probe_ctx->hostids[i].hostaddr); 1862 } 1863 1864 if (probe_ctx->hostids[i].hostsvcid[0] != '\0') { 1865 snprintf(opts.src_svcid, sizeof(opts.src_svcid), "%s", probe_ctx->hostids[i].hostsvcid); 1866 } 1867 1868 ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts)); 1869 if (ctrlr == NULL) { 1870 SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n", 1871 probe_ctx->trids[i].traddr); 1872 rc = -1; 1873 goto end; 1874 } 1875 1876 rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i], 0); 1877 if (rc) { 1878 goto end; 1879 } 1880 1881 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&probe_ctx->trids[i]); 1882 if (!nvme_bdev_ctrlr) { 1883 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1884 rc = -ENODEV; 1885 goto end; 1886 } 1887 1888 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1889 } else { 1890 local_nvme_num++; 1891 } 1892 } 1893 1894 if (local_nvme_num > 0) { 1895 /* used to probe local NVMe device */ 1896 if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) { 1897 rc = -1; 1898 goto end; 1899 } 1900 1901 for (i = 0; i < probe_ctx->count; i++) { 1902 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 1903 continue; 1904 } 1905 1906 if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 1907 SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); 1908 SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); 1909 } 1910 } 1911 } 1912 1913 rc = spdk_bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL); 1914 if (rc) { 1915 SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc)); 1916 rc = -1; 1917 } 1918 end: 1919 free(probe_ctx); 1920 return rc; 1921 } 1922 1923 static void 1924 bdev_nvme_library_fini(void) 1925 { 1926 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp; 1927 struct nvme_probe_skip_entry *entry, *entry_tmp; 1928 struct nvme_bdev_ns *ns; 1929 uint32_t i; 1930 1931 spdk_poller_unregister(&g_hotplug_poller); 1932 free(g_hotplug_probe_ctx); 1933 1934 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) { 1935 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1936 free(entry); 1937 } 1938 1939 pthread_mutex_lock(&g_bdev_nvme_mutex); 1940 TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) { 1941 if (nvme_bdev_ctrlr->destruct) { 1942 /* This controller's destruction was already started 1943 * before the application started shutting down 1944 */ 1945 continue; 1946 } 1947 1948 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1949 1950 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1951 uint32_t nsid = i + 1; 1952 1953 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1954 if (ns->populated) { 1955 assert(ns->id == nsid); 1956 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1957 } 1958 } 1959 1960 pthread_mutex_lock(&g_bdev_nvme_mutex); 1961 nvme_bdev_ctrlr->destruct = true; 1962 1963 if (nvme_bdev_ctrlr->ref == 0) { 1964 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1965 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1966 pthread_mutex_lock(&g_bdev_nvme_mutex); 1967 } 1968 } 1969 1970 g_bdev_nvme_module_finish = true; 1971 if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 1972 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1973 spdk_bdev_module_finish_done(); 1974 return; 1975 } 1976 1977 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1978 } 1979 1980 static void 1981 bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io) 1982 { 1983 struct spdk_bdev *bdev = bdev_io->bdev; 1984 struct spdk_dif_ctx dif_ctx; 1985 struct spdk_dif_error err_blk = {}; 1986 int rc; 1987 1988 rc = spdk_dif_ctx_init(&dif_ctx, 1989 bdev->blocklen, bdev->md_len, bdev->md_interleave, 1990 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 1991 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0); 1992 if (rc != 0) { 1993 SPDK_ERRLOG("Initialization of DIF context failed\n"); 1994 return; 1995 } 1996 1997 if (bdev->md_interleave) { 1998 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 1999 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2000 } else { 2001 struct iovec md_iov = { 2002 .iov_base = bdev_io->u.bdev.md_buf, 2003 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 2004 }; 2005 2006 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2007 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2008 } 2009 2010 if (rc != 0) { 2011 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2012 err_blk.err_type, err_blk.err_offset); 2013 } else { 2014 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n"); 2015 } 2016 } 2017 2018 static void 2019 bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2020 { 2021 struct nvme_bdev_io *bio = ref; 2022 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2023 2024 if (spdk_nvme_cpl_is_success(cpl)) { 2025 /* Run PI verification for read data buffer. */ 2026 bdev_nvme_verify_pi_error(bdev_io); 2027 } 2028 2029 /* Return original completion status */ 2030 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, 2031 bio->cpl.status.sc); 2032 } 2033 2034 static void 2035 bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2036 { 2037 struct nvme_bdev_io *bio = ref; 2038 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2039 int ret; 2040 2041 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) { 2042 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n", 2043 cpl->status.sct, cpl->status.sc); 2044 2045 /* Save completion status to use after verifying PI error. */ 2046 bio->cpl = *cpl; 2047 2048 /* Read without PI checking to verify PI error. */ 2049 ret = bdev_nvme_no_pi_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 2050 spdk_bdev_io_get_io_channel(bdev_io), 2051 bio, 2052 bdev_io->u.bdev.iovs, 2053 bdev_io->u.bdev.iovcnt, 2054 bdev_io->u.bdev.md_buf, 2055 bdev_io->u.bdev.num_blocks, 2056 bdev_io->u.bdev.offset_blocks); 2057 if (ret == 0) { 2058 return; 2059 } 2060 } 2061 2062 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2063 } 2064 2065 static void 2066 bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2067 { 2068 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2069 2070 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2071 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n", 2072 cpl->status.sct, cpl->status.sc); 2073 /* Run PI verification for write data buffer if PI error is detected. */ 2074 bdev_nvme_verify_pi_error(bdev_io); 2075 } 2076 2077 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2078 } 2079 2080 static void 2081 bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2082 { 2083 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2084 2085 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2086 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n", 2087 cpl->status.sct, cpl->status.sc); 2088 /* Run PI verification for compare data buffer if PI error is detected. */ 2089 bdev_nvme_verify_pi_error(bdev_io); 2090 } 2091 2092 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2093 } 2094 2095 static void 2096 bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2097 { 2098 struct nvme_bdev_io *bio = ref; 2099 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2100 2101 /* Compare operation completion */ 2102 if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { 2103 /* Save compare result for write callback */ 2104 bio->cpl = *cpl; 2105 return; 2106 } 2107 2108 /* Write operation completion */ 2109 if (spdk_nvme_cpl_is_error(&bio->cpl)) { 2110 /* If bio->cpl is already an error, it means the compare operation failed. In that case, 2111 * complete the IO with the compare operation's status. 2112 */ 2113 if (!spdk_nvme_cpl_is_error(cpl)) { 2114 SPDK_ERRLOG("Unexpected write success after compare failure.\n"); 2115 } 2116 2117 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2118 } else { 2119 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2120 } 2121 } 2122 2123 static void 2124 bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) 2125 { 2126 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2127 2128 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2129 } 2130 2131 static void 2132 bdev_nvme_admin_passthru_completion(void *ctx) 2133 { 2134 struct nvme_bdev_io *bio = ctx; 2135 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2136 2137 spdk_bdev_io_complete_nvme_status(bdev_io, 2138 bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2139 } 2140 2141 static void 2142 bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) 2143 { 2144 struct nvme_bdev_io *bio = ref; 2145 2146 bio->cpl = *cpl; 2147 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); 2148 } 2149 2150 static void 2151 bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) 2152 { 2153 struct nvme_bdev_io *bio = ref; 2154 struct iovec *iov; 2155 2156 bio->iov_offset = sgl_offset; 2157 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { 2158 iov = &bio->iovs[bio->iovpos]; 2159 if (bio->iov_offset < iov->iov_len) { 2160 break; 2161 } 2162 2163 bio->iov_offset -= iov->iov_len; 2164 } 2165 } 2166 2167 static int 2168 bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) 2169 { 2170 struct nvme_bdev_io *bio = ref; 2171 struct iovec *iov; 2172 2173 assert(bio->iovpos < bio->iovcnt); 2174 2175 iov = &bio->iovs[bio->iovpos]; 2176 2177 *address = iov->iov_base; 2178 *length = iov->iov_len; 2179 2180 if (bio->iov_offset) { 2181 assert(bio->iov_offset <= iov->iov_len); 2182 *address += bio->iov_offset; 2183 *length -= bio->iov_offset; 2184 } 2185 2186 bio->iov_offset += *length; 2187 if (bio->iov_offset == iov->iov_len) { 2188 bio->iovpos++; 2189 bio->iov_offset = 0; 2190 } 2191 2192 return 0; 2193 } 2194 2195 static void 2196 bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset) 2197 { 2198 struct nvme_bdev_io *bio = ref; 2199 struct iovec *iov; 2200 2201 bio->fused_iov_offset = sgl_offset; 2202 for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) { 2203 iov = &bio->fused_iovs[bio->fused_iovpos]; 2204 if (bio->fused_iov_offset < iov->iov_len) { 2205 break; 2206 } 2207 2208 bio->fused_iov_offset -= iov->iov_len; 2209 } 2210 } 2211 2212 static int 2213 bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length) 2214 { 2215 struct nvme_bdev_io *bio = ref; 2216 struct iovec *iov; 2217 2218 assert(bio->fused_iovpos < bio->fused_iovcnt); 2219 2220 iov = &bio->fused_iovs[bio->fused_iovpos]; 2221 2222 *address = iov->iov_base; 2223 *length = iov->iov_len; 2224 2225 if (bio->fused_iov_offset) { 2226 assert(bio->fused_iov_offset <= iov->iov_len); 2227 *address += bio->fused_iov_offset; 2228 *length -= bio->fused_iov_offset; 2229 } 2230 2231 bio->fused_iov_offset += *length; 2232 if (bio->fused_iov_offset == iov->iov_len) { 2233 bio->fused_iovpos++; 2234 bio->fused_iov_offset = 0; 2235 } 2236 2237 return 0; 2238 } 2239 2240 static int 2241 bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2242 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2243 void *md, uint64_t lba_count, uint64_t lba) 2244 { 2245 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2246 int rc; 2247 2248 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx without PI check\n", 2249 lba_count, lba); 2250 2251 bio->iovs = iov; 2252 bio->iovcnt = iovcnt; 2253 bio->iovpos = 0; 2254 bio->iov_offset = 0; 2255 2256 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2257 bdev_nvme_no_pi_readv_done, bio, 0, 2258 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2259 md, 0, 0); 2260 2261 if (rc != 0 && rc != -ENOMEM) { 2262 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc); 2263 } 2264 return rc; 2265 } 2266 2267 static int 2268 bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2269 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2270 void *md, uint64_t lba_count, uint64_t lba) 2271 { 2272 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2273 int rc; 2274 2275 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n", 2276 lba_count, lba); 2277 2278 bio->iovs = iov; 2279 bio->iovcnt = iovcnt; 2280 bio->iovpos = 0; 2281 bio->iov_offset = 0; 2282 2283 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2284 bdev_nvme_readv_done, bio, nbdev->disk.dif_check_flags, 2285 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2286 md, 0, 0); 2287 2288 if (rc != 0 && rc != -ENOMEM) { 2289 SPDK_ERRLOG("readv failed: rc = %d\n", rc); 2290 } 2291 return rc; 2292 } 2293 2294 static int 2295 bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2296 struct nvme_bdev_io *bio, 2297 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2298 { 2299 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2300 int rc; 2301 2302 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n", 2303 lba_count, lba); 2304 2305 bio->iovs = iov; 2306 bio->iovcnt = iovcnt; 2307 bio->iovpos = 0; 2308 bio->iov_offset = 0; 2309 2310 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2311 bdev_nvme_writev_done, bio, nbdev->disk.dif_check_flags, 2312 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2313 md, 0, 0); 2314 2315 if (rc != 0 && rc != -ENOMEM) { 2316 SPDK_ERRLOG("writev failed: rc = %d\n", rc); 2317 } 2318 return rc; 2319 } 2320 2321 static int 2322 bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2323 struct nvme_bdev_io *bio, 2324 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2325 { 2326 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2327 int rc; 2328 2329 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare %lu blocks with offset %#lx\n", 2330 lba_count, lba); 2331 2332 bio->iovs = iov; 2333 bio->iovcnt = iovcnt; 2334 bio->iovpos = 0; 2335 bio->iov_offset = 0; 2336 2337 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2338 bdev_nvme_comparev_done, bio, nbdev->disk.dif_check_flags, 2339 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2340 md, 0, 0); 2341 2342 if (rc != 0 && rc != -ENOMEM) { 2343 SPDK_ERRLOG("comparev failed: rc = %d\n", rc); 2344 } 2345 return rc; 2346 } 2347 2348 static int 2349 bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2350 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 2351 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2352 { 2353 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2354 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2355 uint32_t flags = nbdev->disk.dif_check_flags; 2356 int rc; 2357 2358 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare and write %lu blocks with offset %#lx\n", 2359 lba_count, lba); 2360 2361 bio->iovs = cmp_iov; 2362 bio->iovcnt = cmp_iovcnt; 2363 bio->iovpos = 0; 2364 bio->iov_offset = 0; 2365 bio->fused_iovs = write_iov; 2366 bio->fused_iovcnt = write_iovcnt; 2367 bio->fused_iovpos = 0; 2368 bio->fused_iov_offset = 0; 2369 2370 if (bdev_io->num_retries == 0) { 2371 bio->first_fused_submitted = false; 2372 } 2373 2374 if (!bio->first_fused_submitted) { 2375 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2376 memset(&bio->cpl, 0, sizeof(bio->cpl)); 2377 2378 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2379 bdev_nvme_comparev_and_writev_done, bio, flags, 2380 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0); 2381 if (rc == 0) { 2382 bio->first_fused_submitted = true; 2383 flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2384 } else { 2385 if (rc != -ENOMEM) { 2386 SPDK_ERRLOG("compare failed: rc = %d\n", rc); 2387 } 2388 return rc; 2389 } 2390 } 2391 2392 flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND; 2393 2394 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2395 bdev_nvme_comparev_and_writev_done, bio, flags, 2396 bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0); 2397 if (rc != 0 && rc != -ENOMEM) { 2398 SPDK_ERRLOG("write failed: rc = %d\n", rc); 2399 rc = 0; 2400 } 2401 2402 return rc; 2403 } 2404 2405 static int 2406 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2407 struct nvme_bdev_io *bio, 2408 uint64_t offset_blocks, 2409 uint64_t num_blocks) 2410 { 2411 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2412 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; 2413 struct spdk_nvme_dsm_range *range; 2414 uint64_t offset, remaining; 2415 uint64_t num_ranges_u64; 2416 uint16_t num_ranges; 2417 int rc; 2418 2419 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / 2420 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2421 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { 2422 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 2423 return -EINVAL; 2424 } 2425 num_ranges = (uint16_t)num_ranges_u64; 2426 2427 offset = offset_blocks; 2428 remaining = num_blocks; 2429 range = &dsm_ranges[0]; 2430 2431 /* Fill max-size ranges until the remaining blocks fit into one range */ 2432 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { 2433 range->attributes.raw = 0; 2434 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2435 range->starting_lba = offset; 2436 2437 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2438 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2439 range++; 2440 } 2441 2442 /* Final range describes the remaining blocks */ 2443 range->attributes.raw = 0; 2444 range->length = remaining; 2445 range->starting_lba = offset; 2446 2447 rc = spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, 2448 SPDK_NVME_DSM_ATTR_DEALLOCATE, 2449 dsm_ranges, num_ranges, 2450 bdev_nvme_queued_done, bio); 2451 2452 return rc; 2453 } 2454 2455 static int 2456 bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2457 struct nvme_bdev_io *bio, 2458 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2459 { 2460 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2461 2462 if (nbytes > max_xfer_size) { 2463 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2464 return -EINVAL; 2465 } 2466 2467 bio->orig_thread = spdk_io_channel_get_thread(ch); 2468 2469 return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_bdev_ctrlr->ctrlr, cmd, buf, 2470 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); 2471 } 2472 2473 static int 2474 bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2475 struct nvme_bdev_io *bio, 2476 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2477 { 2478 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2479 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2480 2481 if (nbytes > max_xfer_size) { 2482 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2483 return -EINVAL; 2484 } 2485 2486 /* 2487 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2488 * so fill it out automatically. 2489 */ 2490 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2491 2492 return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2493 (uint32_t)nbytes, bdev_nvme_queued_done, bio); 2494 } 2495 2496 static int 2497 bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2498 struct nvme_bdev_io *bio, 2499 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) 2500 { 2501 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2502 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->nvme_ns->ns); 2503 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2504 2505 if (nbytes > max_xfer_size) { 2506 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2507 return -EINVAL; 2508 } 2509 2510 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns)) { 2511 SPDK_ERRLOG("invalid meta data buffer size\n"); 2512 return -EINVAL; 2513 } 2514 2515 /* 2516 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2517 * so fill it out automatically. 2518 */ 2519 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2520 2521 return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2522 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); 2523 } 2524 2525 static void 2526 bdev_nvme_get_spdk_running_config(FILE *fp) 2527 { 2528 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2529 2530 fprintf(fp, "\n[Nvme]"); 2531 fprintf(fp, "\n" 2532 "# NVMe Device Whitelist\n" 2533 "# Users may specify which NVMe devices to claim by their transport id.\n" 2534 "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n" 2535 "# The second argument is the assigned name, which can be referenced from\n" 2536 "# other sections in the configuration file. For NVMe devices, a namespace\n" 2537 "# is automatically appended to each name in the format <YourName>nY, where\n" 2538 "# Y is the NSID (starts at 1).\n"); 2539 2540 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2541 const char *trtype; 2542 const char *prchk_flags; 2543 2544 trtype = spdk_nvme_transport_id_trtype_str(nvme_bdev_ctrlr->trid.trtype); 2545 if (!trtype) { 2546 continue; 2547 } 2548 2549 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 2550 fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n", 2551 trtype, 2552 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->name); 2553 } else { 2554 const char *adrfam; 2555 2556 adrfam = spdk_nvme_transport_id_adrfam_str(nvme_bdev_ctrlr->trid.adrfam); 2557 prchk_flags = spdk_nvme_prchk_flags_str(nvme_bdev_ctrlr->prchk_flags); 2558 2559 if (adrfam) { 2560 fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2561 trtype, adrfam, 2562 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid, 2563 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name); 2564 } else { 2565 fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2566 trtype, 2567 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid, 2568 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name); 2569 } 2570 2571 if (prchk_flags) { 2572 fprintf(fp, " \"%s\"\n", prchk_flags); 2573 } else { 2574 fprintf(fp, "\n"); 2575 } 2576 } 2577 } 2578 2579 fprintf(fp, "\n" 2580 "# The number of attempts per I/O when an I/O fails. Do not include\n" 2581 "# this key to get the default behavior.\n"); 2582 fprintf(fp, "RetryCount %d\n", g_opts.retry_count); 2583 fprintf(fp, "\n" 2584 "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n"); 2585 fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us); 2586 2587 fprintf(fp, "\n" 2588 "# Action to take on command time out. Only valid when Timeout is greater\n" 2589 "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n" 2590 "# the command, or 'None' to just print a message but do nothing.\n" 2591 "# Admin command timeouts will always result in a reset.\n"); 2592 switch (g_opts.action_on_timeout) { 2593 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 2594 fprintf(fp, "ActionOnTimeout None\n"); 2595 break; 2596 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 2597 fprintf(fp, "ActionOnTimeout Reset\n"); 2598 break; 2599 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 2600 fprintf(fp, "ActionOnTimeout Abort\n"); 2601 break; 2602 } 2603 2604 fprintf(fp, "\n" 2605 "# Set how often the admin queue is polled for asynchronous events.\n" 2606 "# Units in microseconds.\n"); 2607 fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us); 2608 fprintf(fp, "IOPollRate %" PRIu64"\n", g_opts.nvme_ioq_poll_period_us); 2609 fprintf(fp, "\n" 2610 "# Disable handling of hotplug (runtime insert and remove) events,\n" 2611 "# users can set to Yes if want to enable it.\n" 2612 "# Default: No\n"); 2613 fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No"); 2614 fprintf(fp, "\n" 2615 "# Set how often the hotplug is processed for insert and remove events." 2616 "# Units in microseconds.\n"); 2617 fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us); 2618 if (g_nvme_hostnqn) { 2619 fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn); 2620 } 2621 fprintf(fp, "DelayCmdSubmit %s\n", g_opts.delay_cmd_submit ? "True" : "False"); 2622 2623 fprintf(fp, "\n"); 2624 } 2625 2626 static void 2627 nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2628 { 2629 /* nop */ 2630 } 2631 2632 static void 2633 nvme_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2634 { 2635 g_config_json_namespace_fn[ns->type](w, ns); 2636 } 2637 2638 static int 2639 bdev_nvme_config_json(struct spdk_json_write_ctx *w) 2640 { 2641 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2642 struct spdk_nvme_transport_id *trid; 2643 const char *action; 2644 uint32_t nsid; 2645 2646 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { 2647 action = "reset"; 2648 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { 2649 action = "abort"; 2650 } else { 2651 action = "none"; 2652 } 2653 2654 spdk_json_write_object_begin(w); 2655 2656 spdk_json_write_named_string(w, "method", "bdev_nvme_set_options"); 2657 2658 spdk_json_write_named_object_begin(w, "params"); 2659 spdk_json_write_named_string(w, "action_on_timeout", action); 2660 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); 2661 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); 2662 spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst); 2663 spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight); 2664 spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight); 2665 spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight); 2666 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); 2667 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); 2668 spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); 2669 spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); 2670 spdk_json_write_object_end(w); 2671 2672 spdk_json_write_object_end(w); 2673 2674 pthread_mutex_lock(&g_bdev_nvme_mutex); 2675 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2676 trid = &nvme_bdev_ctrlr->trid; 2677 2678 spdk_json_write_object_begin(w); 2679 2680 spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller"); 2681 2682 spdk_json_write_named_object_begin(w, "params"); 2683 spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name); 2684 nvme_bdev_dump_trid_json(trid, w); 2685 spdk_json_write_named_bool(w, "prchk_reftag", 2686 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); 2687 spdk_json_write_named_bool(w, "prchk_guard", 2688 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); 2689 2690 spdk_json_write_object_end(w); 2691 2692 spdk_json_write_object_end(w); 2693 2694 for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) { 2695 if (!nvme_bdev_ctrlr->namespaces[nsid]->populated) { 2696 continue; 2697 } 2698 2699 nvme_namespace_config_json(w, nvme_bdev_ctrlr->namespaces[nsid]); 2700 } 2701 } 2702 2703 /* Dump as last parameter to give all NVMe bdevs chance to be constructed 2704 * before enabling hotplug poller. 2705 */ 2706 spdk_json_write_object_begin(w); 2707 spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug"); 2708 2709 spdk_json_write_named_object_begin(w, "params"); 2710 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); 2711 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); 2712 spdk_json_write_object_end(w); 2713 2714 spdk_json_write_object_end(w); 2715 2716 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2717 return 0; 2718 } 2719 2720 struct spdk_nvme_ctrlr * 2721 spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) 2722 { 2723 if (!bdev || bdev->module != &nvme_if) { 2724 return NULL; 2725 } 2726 2727 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_bdev_ctrlr->ctrlr; 2728 } 2729 2730 SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME) 2731