1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_nvme.h" 37 #include "bdev_ocssd.h" 38 39 #include "spdk/config.h" 40 #include "spdk/endian.h" 41 #include "spdk/bdev.h" 42 #include "spdk/json.h" 43 #include "spdk/nvme.h" 44 #include "spdk/nvme_ocssd.h" 45 #include "spdk/thread.h" 46 #include "spdk/string.h" 47 #include "spdk/likely.h" 48 #include "spdk/util.h" 49 50 #include "spdk/bdev_module.h" 51 #include "spdk/log.h" 52 53 #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true 54 55 static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); 56 57 struct nvme_bdev_io { 58 /** array of iovecs to transfer. */ 59 struct iovec *iovs; 60 61 /** Number of iovecs in iovs array. */ 62 int iovcnt; 63 64 /** Current iovec position. */ 65 int iovpos; 66 67 /** Offset in current iovec. */ 68 uint32_t iov_offset; 69 70 /** array of iovecs to transfer. */ 71 struct iovec *fused_iovs; 72 73 /** Number of iovecs in iovs array. */ 74 int fused_iovcnt; 75 76 /** Current iovec position. */ 77 int fused_iovpos; 78 79 /** Offset in current iovec. */ 80 uint32_t fused_iov_offset; 81 82 /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ 83 struct spdk_nvme_cpl cpl; 84 85 /** Originating thread */ 86 struct spdk_thread *orig_thread; 87 88 /** Keeps track if first of fused commands was submitted */ 89 bool first_fused_submitted; 90 }; 91 92 struct nvme_probe_ctx { 93 size_t count; 94 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; 95 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; 96 const char *names[NVME_MAX_CONTROLLERS]; 97 uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; 98 const char *hostnqn; 99 }; 100 101 struct nvme_probe_skip_entry { 102 struct spdk_nvme_transport_id trid; 103 TAILQ_ENTRY(nvme_probe_skip_entry) tailq; 104 }; 105 /* All the controllers deleted by users via RPC are skipped by hotplug monitor */ 106 static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER( 107 g_skipped_nvme_ctrlrs); 108 109 static struct spdk_bdev_nvme_opts g_opts = { 110 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, 111 .timeout_us = 0, 112 .retry_count = 4, 113 .arbitration_burst = 0, 114 .low_priority_weight = 0, 115 .medium_priority_weight = 0, 116 .high_priority_weight = 0, 117 .nvme_adminq_poll_period_us = 10000ULL, 118 .nvme_ioq_poll_period_us = 0, 119 .io_queue_requests = 0, 120 .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, 121 }; 122 123 #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL 124 #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL 125 126 static int g_hot_insert_nvme_controller_index = 0; 127 static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; 128 static bool g_nvme_hotplug_enabled = false; 129 static struct spdk_thread *g_bdev_nvme_init_thread; 130 static struct spdk_poller *g_hotplug_poller; 131 static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; 132 133 static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 134 struct nvme_async_probe_ctx *ctx); 135 static void nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx); 136 static int bdev_nvme_library_init(void); 137 static void bdev_nvme_library_fini(void); 138 static int bdev_nvme_readv(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 139 struct nvme_bdev_io *bio, 140 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 141 uint32_t flags); 142 static int bdev_nvme_no_pi_readv(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 143 struct nvme_bdev_io *bio, 144 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 145 static int bdev_nvme_writev(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 146 struct nvme_bdev_io *bio, 147 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 148 uint32_t flags); 149 static int bdev_nvme_comparev(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 150 struct nvme_bdev_io *bio, 151 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 152 uint32_t flags); 153 static int bdev_nvme_comparev_and_writev(struct nvme_bdev_ns *nvme_ns, 154 struct nvme_io_channel *nvme_ch, 155 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 156 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba, 157 uint32_t flags); 158 static int bdev_nvme_admin_passthru(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 159 struct nvme_bdev_io *bio, 160 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 161 static int bdev_nvme_io_passthru(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 162 struct nvme_bdev_io *bio, 163 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 164 static int bdev_nvme_io_passthru_md(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 165 struct nvme_bdev_io *bio, 166 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); 167 static int bdev_nvme_abort(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 168 struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort); 169 static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio, 170 bool failover); 171 172 typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 173 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 174 static void nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 175 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 176 177 static populate_namespace_fn g_populate_namespace_fn[] = { 178 NULL, 179 nvme_ctrlr_populate_standard_namespace, 180 bdev_ocssd_populate_namespace, 181 }; 182 183 typedef void (*depopulate_namespace_fn)(struct nvme_bdev_ns *ns); 184 static void nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns); 185 186 static depopulate_namespace_fn g_depopulate_namespace_fn[] = { 187 NULL, 188 nvme_ctrlr_depopulate_standard_namespace, 189 bdev_ocssd_depopulate_namespace, 190 }; 191 192 typedef void (*config_json_namespace_fn)(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns); 193 static void nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, 194 struct nvme_bdev_ns *ns); 195 196 static config_json_namespace_fn g_config_json_namespace_fn[] = { 197 NULL, 198 nvme_ctrlr_config_json_standard_namespace, 199 bdev_ocssd_namespace_config_json, 200 }; 201 202 struct spdk_nvme_qpair * 203 bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) 204 { 205 struct nvme_io_channel *nvme_ch; 206 207 nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); 208 209 return nvme_ch->qpair; 210 } 211 212 static int 213 bdev_nvme_get_ctx_size(void) 214 { 215 return sizeof(struct nvme_bdev_io); 216 } 217 218 static struct spdk_bdev_module nvme_if = { 219 .name = "nvme", 220 .async_fini = true, 221 .module_init = bdev_nvme_library_init, 222 .module_fini = bdev_nvme_library_fini, 223 .config_json = bdev_nvme_config_json, 224 .get_ctx_size = bdev_nvme_get_ctx_size, 225 226 }; 227 SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if) 228 229 static void 230 bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 231 { 232 SPDK_DEBUGLOG(bdev_nvme, "qpar %p is disconnected, attempting reconnect.\n", qpair); 233 /* 234 * Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will 235 * reconnect a qpair and we will stop getting a callback for this one. 236 */ 237 spdk_nvme_ctrlr_reconnect_io_qpair(qpair); 238 } 239 240 static int 241 bdev_nvme_poll(void *arg) 242 { 243 struct nvme_bdev_poll_group *group = arg; 244 int64_t num_completions; 245 246 if (group->collect_spin_stat && group->start_ticks == 0) { 247 group->start_ticks = spdk_get_ticks(); 248 } 249 250 num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, 251 bdev_nvme_disconnected_qpair_cb); 252 if (group->collect_spin_stat) { 253 if (num_completions > 0) { 254 if (group->end_ticks != 0) { 255 group->spin_ticks += (group->end_ticks - group->start_ticks); 256 group->end_ticks = 0; 257 } 258 group->start_ticks = 0; 259 } else { 260 group->end_ticks = spdk_get_ticks(); 261 } 262 } 263 264 return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 265 } 266 267 static int 268 bdev_nvme_poll_adminq(void *arg) 269 { 270 int32_t rc; 271 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg; 272 273 assert(nvme_bdev_ctrlr != NULL); 274 275 rc = spdk_nvme_ctrlr_process_admin_completions(nvme_bdev_ctrlr->ctrlr); 276 if (rc < 0) { 277 bdev_nvme_reset(nvme_bdev_ctrlr, NULL, true); 278 } 279 280 return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; 281 } 282 283 static int 284 bdev_nvme_destruct(void *ctx) 285 { 286 struct nvme_bdev *nvme_disk = ctx; 287 288 nvme_bdev_detach_bdev_from_ns(nvme_disk); 289 290 free(nvme_disk->disk.name); 291 free(nvme_disk); 292 293 return 0; 294 } 295 296 static int 297 bdev_nvme_flush(struct nvme_bdev_ns *nvme_ns, struct nvme_bdev_io *bio, 298 uint64_t offset, uint64_t nbytes) 299 { 300 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); 301 302 return 0; 303 } 304 305 static void 306 _bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i) 307 { 308 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 309 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 310 struct spdk_bdev_io *bdev_io; 311 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 312 313 /* A NULL ctx means success. */ 314 if (spdk_io_channel_iter_get_ctx(i) != NULL) { 315 status = SPDK_BDEV_IO_STATUS_FAILED; 316 } 317 318 while (!TAILQ_EMPTY(&nvme_ch->pending_resets)) { 319 bdev_io = TAILQ_FIRST(&nvme_ch->pending_resets); 320 TAILQ_REMOVE(&nvme_ch->pending_resets, bdev_io, module_link); 321 spdk_bdev_io_complete(bdev_io, status); 322 } 323 324 spdk_for_each_channel_continue(i, 0); 325 } 326 327 static void 328 _bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc) 329 { 330 /* we are using the for_each_channel cb_arg like a return code here. */ 331 /* If it's zero, we succeeded, otherwise, the reset failed. */ 332 void *cb_arg = NULL; 333 334 if (rc) { 335 cb_arg = (void *)0x1; 336 SPDK_ERRLOG("Resetting controller failed.\n"); 337 } else { 338 SPDK_NOTICELOG("Resetting controller successful.\n"); 339 } 340 341 pthread_mutex_lock(&g_bdev_nvme_mutex); 342 nvme_bdev_ctrlr->resetting = false; 343 nvme_bdev_ctrlr->failover_in_progress = false; 344 pthread_mutex_unlock(&g_bdev_nvme_mutex); 345 /* Make sure we clear any pending resets before returning. */ 346 spdk_for_each_channel(nvme_bdev_ctrlr, 347 _bdev_nvme_complete_pending_resets, 348 cb_arg, NULL); 349 } 350 351 static void 352 _bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status) 353 { 354 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 355 void *ctx = spdk_io_channel_iter_get_ctx(i); 356 int rc = SPDK_BDEV_IO_STATUS_SUCCESS; 357 358 if (status) { 359 rc = SPDK_BDEV_IO_STATUS_FAILED; 360 } 361 if (ctx) { 362 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc); 363 } 364 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 365 } 366 367 static void 368 _bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) 369 { 370 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 371 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 372 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 373 struct spdk_nvme_io_qpair_opts opts; 374 375 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 376 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 377 opts.create_only = true; 378 379 nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 380 if (!nvme_ch->qpair) { 381 spdk_for_each_channel_continue(i, -1); 382 return; 383 } 384 385 assert(nvme_ch->group != NULL); 386 if (spdk_nvme_poll_group_add(nvme_ch->group->group, nvme_ch->qpair) != 0) { 387 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n"); 388 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 389 spdk_for_each_channel_continue(i, -1); 390 return; 391 } 392 393 if (spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair)) { 394 SPDK_ERRLOG("Unable to connect I/O qpair.\n"); 395 spdk_nvme_poll_group_remove(nvme_ch->group->group, nvme_ch->qpair); 396 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 397 spdk_for_each_channel_continue(i, -1); 398 return; 399 } 400 401 spdk_for_each_channel_continue(i, 0); 402 } 403 404 static void 405 _bdev_nvme_reset(struct spdk_io_channel_iter *i, int status) 406 { 407 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 408 struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i); 409 int rc; 410 411 if (status) { 412 if (bio) { 413 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 414 } 415 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 416 return; 417 } 418 419 rc = spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr); 420 if (rc != 0) { 421 if (bio) { 422 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 423 } 424 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, rc); 425 return; 426 } 427 428 /* Recreate all of the I/O queue pairs */ 429 spdk_for_each_channel(nvme_bdev_ctrlr, 430 _bdev_nvme_reset_create_qpair, 431 bio, 432 _bdev_nvme_reset_create_qpairs_done); 433 } 434 435 static void 436 _bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) 437 { 438 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 439 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 440 int rc; 441 442 rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 443 if (!rc) { 444 nvme_ch->qpair = NULL; 445 } 446 447 spdk_for_each_channel_continue(i, rc); 448 } 449 450 static int 451 bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio, bool failover) 452 { 453 struct spdk_io_channel *ch; 454 struct nvme_io_channel *nvme_ch; 455 struct nvme_bdev_ctrlr_trid *curr_trid = NULL, *next_trid = NULL; 456 int rc = 0; 457 458 pthread_mutex_lock(&g_bdev_nvme_mutex); 459 if (nvme_bdev_ctrlr->destruct) { 460 pthread_mutex_unlock(&g_bdev_nvme_mutex); 461 /* Don't bother resetting if the controller is in the process of being destructed. */ 462 if (bio) { 463 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 464 } 465 return 0; 466 } 467 468 if (failover) { 469 curr_trid = TAILQ_FIRST(&nvme_bdev_ctrlr->trids); 470 assert(curr_trid); 471 assert(&curr_trid->trid == nvme_bdev_ctrlr->connected_trid); 472 next_trid = TAILQ_NEXT(curr_trid, link); 473 if (!next_trid) { 474 failover = false; 475 } 476 } 477 478 if (nvme_bdev_ctrlr->resetting) { 479 if (failover && !nvme_bdev_ctrlr->failover_in_progress) { 480 rc = -EAGAIN; 481 } 482 pthread_mutex_unlock(&g_bdev_nvme_mutex); 483 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 484 /* 485 * The internal reset calls won't be queued. This is on purpose so that we don't 486 * interfere with the app framework reset strategy. i.e. we are deferring to the 487 * upper level. If they are in the middle of a reset, we won't try to schedule another one. 488 */ 489 if (bio) { 490 ch = spdk_get_io_channel(nvme_bdev_ctrlr); 491 assert(ch != NULL); 492 nvme_ch = spdk_io_channel_get_ctx(ch); 493 TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link); 494 spdk_put_io_channel(ch); 495 } 496 return rc; 497 } 498 499 nvme_bdev_ctrlr->resetting = true; 500 if (failover) { 501 nvme_bdev_ctrlr->failover_in_progress = true; 502 503 spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); 504 nvme_bdev_ctrlr->connected_trid = &next_trid->trid; 505 rc = spdk_nvme_ctrlr_set_trid(nvme_bdev_ctrlr->ctrlr, &next_trid->trid); 506 assert(rc == 0); 507 /** Shuffle the old trid to the end of the list and use the new one. 508 * Allows for round robin through multiple connections. 509 */ 510 TAILQ_REMOVE(&nvme_bdev_ctrlr->trids, curr_trid, link); 511 TAILQ_INSERT_TAIL(&nvme_bdev_ctrlr->trids, curr_trid, link); 512 } 513 514 pthread_mutex_unlock(&g_bdev_nvme_mutex); 515 /* First, delete all NVMe I/O queue pairs. */ 516 spdk_for_each_channel(nvme_bdev_ctrlr, 517 _bdev_nvme_reset_destroy_qpair, 518 bio, 519 _bdev_nvme_reset); 520 521 return 0; 522 } 523 524 static int 525 bdev_nvme_unmap(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 526 struct nvme_bdev_io *bio, 527 uint64_t offset_blocks, 528 uint64_t num_blocks); 529 530 static void 531 bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 532 bool success) 533 { 534 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 535 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 536 int ret; 537 538 if (!success) { 539 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 540 return; 541 } 542 543 ret = bdev_nvme_readv(nbdev->nvme_ns, 544 nvme_ch, 545 (struct nvme_bdev_io *)bdev_io->driver_ctx, 546 bdev_io->u.bdev.iovs, 547 bdev_io->u.bdev.iovcnt, 548 bdev_io->u.bdev.md_buf, 549 bdev_io->u.bdev.num_blocks, 550 bdev_io->u.bdev.offset_blocks, 551 nbdev->disk.dif_check_flags); 552 553 if (spdk_likely(ret == 0)) { 554 return; 555 } else if (ret == -ENOMEM) { 556 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 557 } else { 558 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 559 } 560 } 561 562 static int 563 _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 564 { 565 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 566 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 567 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; 568 struct nvme_bdev_io *nbdev_io_to_abort; 569 570 if (nvme_ch->qpair == NULL) { 571 /* The device is currently resetting */ 572 return -1; 573 } 574 575 switch (bdev_io->type) { 576 case SPDK_BDEV_IO_TYPE_READ: 577 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) { 578 bdev_nvme_get_buf_cb(ch, bdev_io, true); 579 } else { 580 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, 581 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 582 } 583 return 0; 584 585 case SPDK_BDEV_IO_TYPE_WRITE: 586 return bdev_nvme_writev(nbdev->nvme_ns, 587 nvme_ch, 588 nbdev_io, 589 bdev_io->u.bdev.iovs, 590 bdev_io->u.bdev.iovcnt, 591 bdev_io->u.bdev.md_buf, 592 bdev_io->u.bdev.num_blocks, 593 bdev_io->u.bdev.offset_blocks, 594 nbdev->disk.dif_check_flags); 595 596 case SPDK_BDEV_IO_TYPE_COMPARE: 597 return bdev_nvme_comparev(nbdev->nvme_ns, 598 nvme_ch, 599 nbdev_io, 600 bdev_io->u.bdev.iovs, 601 bdev_io->u.bdev.iovcnt, 602 bdev_io->u.bdev.md_buf, 603 bdev_io->u.bdev.num_blocks, 604 bdev_io->u.bdev.offset_blocks, 605 nbdev->disk.dif_check_flags); 606 607 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 608 return bdev_nvme_comparev_and_writev(nbdev->nvme_ns, 609 nvme_ch, 610 nbdev_io, 611 bdev_io->u.bdev.iovs, 612 bdev_io->u.bdev.iovcnt, 613 bdev_io->u.bdev.fused_iovs, 614 bdev_io->u.bdev.fused_iovcnt, 615 bdev_io->u.bdev.md_buf, 616 bdev_io->u.bdev.num_blocks, 617 bdev_io->u.bdev.offset_blocks, 618 nbdev->disk.dif_check_flags); 619 620 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 621 return bdev_nvme_unmap(nbdev->nvme_ns, 622 nvme_ch, 623 nbdev_io, 624 bdev_io->u.bdev.offset_blocks, 625 bdev_io->u.bdev.num_blocks); 626 627 case SPDK_BDEV_IO_TYPE_UNMAP: 628 return bdev_nvme_unmap(nbdev->nvme_ns, 629 nvme_ch, 630 nbdev_io, 631 bdev_io->u.bdev.offset_blocks, 632 bdev_io->u.bdev.num_blocks); 633 634 case SPDK_BDEV_IO_TYPE_RESET: 635 return bdev_nvme_reset(nbdev->nvme_ns->ctrlr, nbdev_io, false); 636 637 case SPDK_BDEV_IO_TYPE_FLUSH: 638 return bdev_nvme_flush(nbdev->nvme_ns, 639 nbdev_io, 640 bdev_io->u.bdev.offset_blocks, 641 bdev_io->u.bdev.num_blocks); 642 643 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 644 return bdev_nvme_admin_passthru(nbdev->nvme_ns, 645 nvme_ch, 646 nbdev_io, 647 &bdev_io->u.nvme_passthru.cmd, 648 bdev_io->u.nvme_passthru.buf, 649 bdev_io->u.nvme_passthru.nbytes); 650 651 case SPDK_BDEV_IO_TYPE_NVME_IO: 652 return bdev_nvme_io_passthru(nbdev->nvme_ns, 653 nvme_ch, 654 nbdev_io, 655 &bdev_io->u.nvme_passthru.cmd, 656 bdev_io->u.nvme_passthru.buf, 657 bdev_io->u.nvme_passthru.nbytes); 658 659 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 660 return bdev_nvme_io_passthru_md(nbdev->nvme_ns, 661 nvme_ch, 662 nbdev_io, 663 &bdev_io->u.nvme_passthru.cmd, 664 bdev_io->u.nvme_passthru.buf, 665 bdev_io->u.nvme_passthru.nbytes, 666 bdev_io->u.nvme_passthru.md_buf, 667 bdev_io->u.nvme_passthru.md_len); 668 669 case SPDK_BDEV_IO_TYPE_ABORT: 670 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx; 671 return bdev_nvme_abort(nbdev->nvme_ns, 672 nvme_ch, 673 nbdev_io, 674 nbdev_io_to_abort); 675 676 default: 677 return -EINVAL; 678 } 679 return 0; 680 } 681 682 static void 683 bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 684 { 685 int rc = _bdev_nvme_submit_request(ch, bdev_io); 686 687 if (spdk_unlikely(rc != 0)) { 688 if (rc == -ENOMEM) { 689 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 690 } else { 691 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 692 } 693 } 694 } 695 696 static bool 697 bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 698 { 699 struct nvme_bdev *nbdev = ctx; 700 struct nvme_bdev_ns *nvme_ns = nbdev->nvme_ns; 701 const struct spdk_nvme_ctrlr_data *cdata; 702 703 switch (io_type) { 704 case SPDK_BDEV_IO_TYPE_READ: 705 case SPDK_BDEV_IO_TYPE_WRITE: 706 case SPDK_BDEV_IO_TYPE_RESET: 707 case SPDK_BDEV_IO_TYPE_FLUSH: 708 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 709 case SPDK_BDEV_IO_TYPE_NVME_IO: 710 case SPDK_BDEV_IO_TYPE_ABORT: 711 return true; 712 713 case SPDK_BDEV_IO_TYPE_COMPARE: 714 return spdk_nvme_ns_supports_compare(nvme_ns->ns); 715 716 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 717 return spdk_nvme_ns_get_md_size(nvme_ns->ns) ? true : false; 718 719 case SPDK_BDEV_IO_TYPE_UNMAP: 720 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr); 721 return cdata->oncs.dsm; 722 723 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 724 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr); 725 /* 726 * If an NVMe controller guarantees reading unallocated blocks returns zero, 727 * we can implement WRITE_ZEROES as an NVMe deallocate command. 728 */ 729 if (cdata->oncs.dsm && 730 spdk_nvme_ns_get_dealloc_logical_block_read_value(nvme_ns->ns) == 731 SPDK_NVME_DEALLOC_READ_00) { 732 return true; 733 } 734 /* 735 * The NVMe controller write_zeroes function is currently not used by our driver. 736 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail. 737 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read. 738 */ 739 return false; 740 741 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 742 if (spdk_nvme_ctrlr_get_flags(nvme_ns->ctrlr->ctrlr) & 743 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) { 744 return true; 745 } 746 return false; 747 748 default: 749 return false; 750 } 751 } 752 753 static int 754 bdev_nvme_create_cb(void *io_device, void *ctx_buf) 755 { 756 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 757 struct nvme_io_channel *ch = ctx_buf; 758 struct spdk_nvme_io_qpair_opts opts; 759 struct spdk_io_channel *pg_ch = NULL; 760 int rc; 761 762 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 763 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 764 opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests); 765 opts.create_only = true; 766 g_opts.io_queue_requests = opts.io_queue_requests; 767 768 ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 769 770 if (ch->qpair == NULL) { 771 return -1; 772 } 773 774 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 775 if (bdev_ocssd_create_io_channel(ch)) { 776 goto err; 777 } 778 } 779 780 pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs); 781 if (!pg_ch) { 782 goto err; 783 } 784 785 ch->group = spdk_io_channel_get_ctx(pg_ch); 786 if (spdk_nvme_poll_group_add(ch->group->group, ch->qpair) != 0) { 787 goto err; 788 } 789 790 rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, ch->qpair); 791 if (rc) { 792 spdk_nvme_poll_group_remove(ch->group->group, ch->qpair); 793 goto err; 794 } 795 796 #ifdef SPDK_CONFIG_VTUNE 797 ch->group->collect_spin_stat = true; 798 #else 799 ch->group->collect_spin_stat = false; 800 #endif 801 802 TAILQ_INIT(&ch->pending_resets); 803 return 0; 804 805 err: 806 if (pg_ch) { 807 spdk_put_io_channel(pg_ch); 808 } 809 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 810 return -1; 811 } 812 813 static void 814 bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) 815 { 816 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 817 struct nvme_io_channel *ch = ctx_buf; 818 struct nvme_bdev_poll_group *group; 819 820 group = ch->group; 821 assert(group != NULL); 822 823 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 824 bdev_ocssd_destroy_io_channel(ch); 825 } 826 827 if (ch->qpair != NULL) { 828 spdk_nvme_poll_group_remove(group->group, ch->qpair); 829 } 830 spdk_put_io_channel(spdk_io_channel_from_ctx(group)); 831 832 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 833 } 834 835 static int 836 bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf) 837 { 838 struct nvme_bdev_poll_group *group = ctx_buf; 839 840 group->group = spdk_nvme_poll_group_create(group); 841 if (group->group == NULL) { 842 return -1; 843 } 844 845 group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); 846 847 if (group->poller == NULL) { 848 spdk_nvme_poll_group_destroy(group->group); 849 return -1; 850 } 851 852 return 0; 853 } 854 855 static void 856 bdev_nvme_poll_group_destroy_cb(void *io_device, void *ctx_buf) 857 { 858 struct nvme_bdev_poll_group *group = ctx_buf; 859 860 spdk_poller_unregister(&group->poller); 861 if (spdk_nvme_poll_group_destroy(group->group)) { 862 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module."); 863 assert(false); 864 } 865 } 866 867 static struct spdk_io_channel * 868 bdev_nvme_get_io_channel(void *ctx) 869 { 870 struct nvme_bdev *nvme_bdev = ctx; 871 872 return spdk_get_io_channel(nvme_bdev->nvme_ns->ctrlr); 873 } 874 875 static int 876 bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 877 { 878 struct nvme_bdev *nvme_bdev = ctx; 879 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_ns->ctrlr; 880 const struct spdk_nvme_ctrlr_data *cdata; 881 struct spdk_nvme_ns *ns; 882 union spdk_nvme_vs_register vs; 883 union spdk_nvme_csts_register csts; 884 char buf[128]; 885 886 cdata = spdk_nvme_ctrlr_get_data(nvme_bdev_ctrlr->ctrlr); 887 vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev_ctrlr->ctrlr); 888 csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev_ctrlr->ctrlr); 889 ns = nvme_bdev->nvme_ns->ns; 890 891 spdk_json_write_named_object_begin(w, "nvme"); 892 893 if (nvme_bdev_ctrlr->connected_trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 894 spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->connected_trid->traddr); 895 } 896 897 spdk_json_write_named_object_begin(w, "trid"); 898 899 nvme_bdev_dump_trid_json(nvme_bdev_ctrlr->connected_trid, w); 900 901 spdk_json_write_object_end(w); 902 903 #ifdef SPDK_CONFIG_NVME_CUSE 904 size_t cuse_name_size = 128; 905 char cuse_name[cuse_name_size]; 906 907 int rc = spdk_nvme_cuse_get_ns_name(nvme_bdev_ctrlr->ctrlr, spdk_nvme_ns_get_id(ns), 908 cuse_name, &cuse_name_size); 909 if (rc == 0) { 910 spdk_json_write_named_string(w, "cuse_device", cuse_name); 911 } 912 #endif 913 914 spdk_json_write_named_object_begin(w, "ctrlr_data"); 915 916 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); 917 918 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); 919 spdk_str_trim(buf); 920 spdk_json_write_named_string(w, "model_number", buf); 921 922 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); 923 spdk_str_trim(buf); 924 spdk_json_write_named_string(w, "serial_number", buf); 925 926 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); 927 spdk_str_trim(buf); 928 spdk_json_write_named_string(w, "firmware_revision", buf); 929 930 spdk_json_write_named_object_begin(w, "oacs"); 931 932 spdk_json_write_named_uint32(w, "security", cdata->oacs.security); 933 spdk_json_write_named_uint32(w, "format", cdata->oacs.format); 934 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); 935 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); 936 937 spdk_json_write_object_end(w); 938 939 spdk_json_write_object_end(w); 940 941 spdk_json_write_named_object_begin(w, "vs"); 942 943 spdk_json_write_name(w, "nvme_version"); 944 if (vs.bits.ter) { 945 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); 946 } else { 947 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); 948 } 949 950 spdk_json_write_object_end(w); 951 952 spdk_json_write_named_object_begin(w, "csts"); 953 954 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); 955 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); 956 957 spdk_json_write_object_end(w); 958 959 spdk_json_write_named_object_begin(w, "ns_data"); 960 961 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); 962 963 spdk_json_write_object_end(w); 964 965 if (cdata->oacs.security) { 966 spdk_json_write_named_object_begin(w, "security"); 967 968 spdk_json_write_named_bool(w, "opal", nvme_bdev_ctrlr->opal_dev ? true : false); 969 970 spdk_json_write_object_end(w); 971 } 972 973 spdk_json_write_object_end(w); 974 975 return 0; 976 } 977 978 static void 979 bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 980 { 981 /* No config per bdev needed */ 982 } 983 984 static uint64_t 985 bdev_nvme_get_spin_time(struct spdk_io_channel *ch) 986 { 987 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 988 struct nvme_bdev_poll_group *group = nvme_ch->group; 989 uint64_t spin_time; 990 991 if (!group || !group->collect_spin_stat) { 992 return 0; 993 } 994 995 if (group->end_ticks != 0) { 996 group->spin_ticks += (group->end_ticks - group->start_ticks); 997 group->end_ticks = 0; 998 } 999 1000 spin_time = (group->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); 1001 group->start_ticks = 0; 1002 group->spin_ticks = 0; 1003 1004 return spin_time; 1005 } 1006 1007 static const struct spdk_bdev_fn_table nvmelib_fn_table = { 1008 .destruct = bdev_nvme_destruct, 1009 .submit_request = bdev_nvme_submit_request, 1010 .io_type_supported = bdev_nvme_io_type_supported, 1011 .get_io_channel = bdev_nvme_get_io_channel, 1012 .dump_info_json = bdev_nvme_dump_info_json, 1013 .write_config_json = bdev_nvme_write_config_json, 1014 .get_spin_time = bdev_nvme_get_spin_time, 1015 }; 1016 1017 static void 1018 nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 1019 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx) 1020 { 1021 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 1022 struct nvme_bdev *bdev; 1023 struct spdk_nvme_ns *ns; 1024 const struct spdk_uuid *uuid; 1025 const struct spdk_nvme_ctrlr_data *cdata; 1026 const struct spdk_nvme_ns_data *nsdata; 1027 int rc; 1028 1029 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 1030 1031 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 1032 if (!ns) { 1033 SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id); 1034 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -EINVAL); 1035 return; 1036 } 1037 1038 bdev = calloc(1, sizeof(*bdev)); 1039 if (!bdev) { 1040 SPDK_ERRLOG("bdev calloc() failed\n"); 1041 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 1042 return; 1043 } 1044 1045 nvme_ns->ns = ns; 1046 bdev->nvme_ns = nvme_ns; 1047 1048 bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns)); 1049 if (!bdev->disk.name) { 1050 free(bdev); 1051 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 1052 return; 1053 } 1054 bdev->disk.product_name = "NVMe disk"; 1055 1056 bdev->disk.write_cache = 0; 1057 if (cdata->vwc.present) { 1058 /* Enable if the Volatile Write Cache exists */ 1059 bdev->disk.write_cache = 1; 1060 } 1061 bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns); 1062 bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); 1063 bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); 1064 1065 uuid = spdk_nvme_ns_get_uuid(ns); 1066 if (uuid != NULL) { 1067 bdev->disk.uuid = *uuid; 1068 } 1069 1070 nsdata = spdk_nvme_ns_get_data(ns); 1071 1072 bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns); 1073 if (bdev->disk.md_len != 0) { 1074 bdev->disk.md_interleave = nsdata->flbas.extended; 1075 bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns); 1076 if (bdev->disk.dif_type != SPDK_DIF_DISABLE) { 1077 bdev->disk.dif_is_head_of_md = nsdata->dps.md_start; 1078 bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags; 1079 } 1080 } 1081 1082 if (!bdev_nvme_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) { 1083 bdev->disk.acwu = 0; 1084 } else if (nsdata->nsfeat.ns_atomic_write_unit) { 1085 bdev->disk.acwu = nsdata->nacwu; 1086 } else { 1087 bdev->disk.acwu = cdata->acwu; 1088 } 1089 1090 bdev->disk.ctxt = bdev; 1091 bdev->disk.fn_table = &nvmelib_fn_table; 1092 bdev->disk.module = &nvme_if; 1093 rc = spdk_bdev_register(&bdev->disk); 1094 if (rc) { 1095 free(bdev->disk.name); 1096 free(bdev); 1097 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, rc); 1098 return; 1099 } 1100 1101 nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev); 1102 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0); 1103 } 1104 1105 static bool 1106 hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1107 struct spdk_nvme_ctrlr_opts *opts) 1108 { 1109 struct nvme_probe_skip_entry *entry; 1110 1111 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) { 1112 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1113 return false; 1114 } 1115 } 1116 1117 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1118 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1119 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1120 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1121 1122 SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr); 1123 1124 return true; 1125 } 1126 1127 static void 1128 nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) 1129 { 1130 struct spdk_nvme_ctrlr *ctrlr = ctx; 1131 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1132 1133 if (spdk_nvme_cpl_is_error(cpl)) { 1134 SPDK_WARNLOG("Abort failed. Resetting controller.\n"); 1135 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1136 assert(nvme_bdev_ctrlr != NULL); 1137 bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); 1138 } 1139 } 1140 1141 static void 1142 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, 1143 struct spdk_nvme_qpair *qpair, uint16_t cid) 1144 { 1145 int rc; 1146 union spdk_nvme_csts_register csts; 1147 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1148 1149 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); 1150 1151 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1152 if (csts.bits.cfs) { 1153 SPDK_ERRLOG("Controller Fatal Status, reset required\n"); 1154 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1155 assert(nvme_bdev_ctrlr != NULL); 1156 bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); 1157 return; 1158 } 1159 1160 switch (g_opts.action_on_timeout) { 1161 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 1162 if (qpair) { 1163 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, 1164 nvme_abort_cpl, ctrlr); 1165 if (rc == 0) { 1166 return; 1167 } 1168 1169 SPDK_ERRLOG("Unable to send abort. Resetting.\n"); 1170 } 1171 1172 /* FALLTHROUGH */ 1173 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 1174 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1175 assert(nvme_bdev_ctrlr != NULL); 1176 bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); 1177 break; 1178 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 1179 SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n"); 1180 break; 1181 default: 1182 SPDK_ERRLOG("An invalid timeout action value is found.\n"); 1183 break; 1184 } 1185 } 1186 1187 void 1188 nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) 1189 { 1190 pthread_mutex_lock(&g_bdev_nvme_mutex); 1191 nvme_bdev_ctrlr->ref--; 1192 1193 if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) { 1194 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1195 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1196 return; 1197 } 1198 1199 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1200 } 1201 1202 static void 1203 nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns) 1204 { 1205 struct nvme_bdev *bdev, *tmp; 1206 1207 TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { 1208 spdk_bdev_unregister(&bdev->disk, NULL, NULL); 1209 } 1210 1211 ns->populated = false; 1212 1213 nvme_ctrlr_depopulate_namespace_done(ns->ctrlr); 1214 } 1215 1216 static void 1217 nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, 1218 struct nvme_async_probe_ctx *ctx) 1219 { 1220 g_populate_namespace_fn[ns->type](ctrlr, ns, ctx); 1221 } 1222 1223 static void 1224 nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns) 1225 { 1226 g_depopulate_namespace_fn[ns->type](ns); 1227 } 1228 1229 void 1230 nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx, 1231 struct nvme_bdev_ns *ns, int rc) 1232 { 1233 if (rc == 0) { 1234 ns->populated = true; 1235 pthread_mutex_lock(&g_bdev_nvme_mutex); 1236 ns->ctrlr->ref++; 1237 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1238 } else { 1239 memset(ns, 0, sizeof(*ns)); 1240 } 1241 1242 if (ctx) { 1243 ctx->populates_in_progress--; 1244 if (ctx->populates_in_progress == 0) { 1245 nvme_ctrlr_populate_namespaces_done(ctx); 1246 } 1247 } 1248 } 1249 1250 static void 1251 nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 1252 struct nvme_async_probe_ctx *ctx) 1253 { 1254 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 1255 struct nvme_bdev_ns *ns; 1256 struct spdk_nvme_ns *nvme_ns; 1257 struct nvme_bdev *bdev; 1258 uint32_t i; 1259 int rc; 1260 uint64_t num_sectors; 1261 bool ns_is_active; 1262 1263 if (ctx) { 1264 /* Initialize this count to 1 to handle the populate functions 1265 * calling nvme_ctrlr_populate_namespace_done() immediately. 1266 */ 1267 ctx->populates_in_progress = 1; 1268 } 1269 1270 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1271 uint32_t nsid = i + 1; 1272 1273 ns = nvme_bdev_ctrlr->namespaces[i]; 1274 ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); 1275 1276 if (ns->populated && ns_is_active && ns->type == NVME_BDEV_NS_STANDARD) { 1277 /* NS is still there but attributes may have changed */ 1278 nvme_ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 1279 num_sectors = spdk_nvme_ns_get_num_sectors(nvme_ns); 1280 bdev = TAILQ_FIRST(&ns->bdevs); 1281 if (bdev->disk.blockcnt != num_sectors) { 1282 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %lu, new size %lu\n", 1283 nsid, 1284 bdev->disk.name, 1285 bdev->disk.blockcnt, 1286 num_sectors); 1287 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors); 1288 if (rc != 0) { 1289 SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n", 1290 bdev->disk.name, rc); 1291 } 1292 } 1293 } 1294 1295 if (!ns->populated && ns_is_active) { 1296 ns->id = nsid; 1297 ns->ctrlr = nvme_bdev_ctrlr; 1298 if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { 1299 ns->type = NVME_BDEV_NS_OCSSD; 1300 } else { 1301 ns->type = NVME_BDEV_NS_STANDARD; 1302 } 1303 1304 TAILQ_INIT(&ns->bdevs); 1305 1306 if (ctx) { 1307 ctx->populates_in_progress++; 1308 } 1309 nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx); 1310 } 1311 1312 if (ns->populated && !ns_is_active) { 1313 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1314 } 1315 } 1316 1317 if (ctx) { 1318 /* Decrement this count now that the loop is over to account 1319 * for the one we started with. If the count is then 0, we 1320 * know any populate_namespace functions completed immediately, 1321 * so we'll kick the callback here. 1322 */ 1323 ctx->populates_in_progress--; 1324 if (ctx->populates_in_progress == 0) { 1325 nvme_ctrlr_populate_namespaces_done(ctx); 1326 } 1327 } 1328 1329 } 1330 1331 static void 1332 nvme_ctrlr_depopulate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) 1333 { 1334 uint32_t i; 1335 struct nvme_bdev_ns *ns; 1336 1337 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1338 uint32_t nsid = i + 1; 1339 1340 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1341 if (ns->populated) { 1342 assert(ns->id == nsid); 1343 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1344 } 1345 } 1346 } 1347 1348 static void 1349 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1350 { 1351 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg; 1352 union spdk_nvme_async_event_completion event; 1353 1354 if (spdk_nvme_cpl_is_error(cpl)) { 1355 SPDK_WARNLOG("AER request execute failed"); 1356 return; 1357 } 1358 1359 event.raw = cpl->cdw0; 1360 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1361 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1362 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1363 } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR) && 1364 (event.bits.log_page_identifier == SPDK_OCSSD_LOG_CHUNK_NOTIFICATION) && 1365 spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1366 bdev_ocssd_handle_chunk_notification(nvme_bdev_ctrlr); 1367 } 1368 } 1369 1370 static int 1371 nvme_bdev_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, 1372 const char *name, 1373 const struct spdk_nvme_transport_id *trid, 1374 uint32_t prchk_flags) 1375 { 1376 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1377 struct nvme_bdev_ctrlr_trid *trid_entry; 1378 uint32_t i; 1379 int rc; 1380 1381 nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr)); 1382 if (nvme_bdev_ctrlr == NULL) { 1383 SPDK_ERRLOG("Failed to allocate device struct\n"); 1384 return -ENOMEM; 1385 } 1386 1387 TAILQ_INIT(&nvme_bdev_ctrlr->trids); 1388 nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 1389 nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *)); 1390 if (!nvme_bdev_ctrlr->namespaces) { 1391 SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); 1392 rc = -ENOMEM; 1393 goto err_alloc_namespaces; 1394 } 1395 1396 trid_entry = calloc(1, sizeof(*trid_entry)); 1397 if (trid_entry == NULL) { 1398 SPDK_ERRLOG("Failed to allocate trid entry pointer\n"); 1399 rc = -ENOMEM; 1400 goto err_alloc_trid; 1401 } 1402 1403 trid_entry->trid = *trid; 1404 1405 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1406 nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns)); 1407 if (nvme_bdev_ctrlr->namespaces[i] == NULL) { 1408 SPDK_ERRLOG("Failed to allocate block namespace struct\n"); 1409 rc = -ENOMEM; 1410 goto err_alloc_namespace; 1411 } 1412 } 1413 1414 nvme_bdev_ctrlr->thread = spdk_get_thread(); 1415 nvme_bdev_ctrlr->adminq_timer_poller = NULL; 1416 nvme_bdev_ctrlr->ctrlr = ctrlr; 1417 nvme_bdev_ctrlr->ref = 0; 1418 nvme_bdev_ctrlr->connected_trid = &trid_entry->trid; 1419 nvme_bdev_ctrlr->name = strdup(name); 1420 if (nvme_bdev_ctrlr->name == NULL) { 1421 rc = -ENOMEM; 1422 goto err_alloc_name; 1423 } 1424 1425 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1426 rc = bdev_ocssd_init_ctrlr(nvme_bdev_ctrlr); 1427 if (spdk_unlikely(rc != 0)) { 1428 SPDK_ERRLOG("Unable to initialize OCSSD controller\n"); 1429 goto err_init_ocssd; 1430 } 1431 } 1432 1433 nvme_bdev_ctrlr->prchk_flags = prchk_flags; 1434 1435 spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb, 1436 sizeof(struct nvme_io_channel), 1437 name); 1438 1439 nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_bdev_ctrlr, 1440 g_opts.nvme_adminq_poll_period_us); 1441 1442 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq); 1443 1444 if (g_opts.timeout_us > 0) { 1445 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, 1446 timeout_cb, NULL); 1447 } 1448 1449 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr); 1450 1451 if (spdk_nvme_ctrlr_get_flags(nvme_bdev_ctrlr->ctrlr) & 1452 SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { 1453 nvme_bdev_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_bdev_ctrlr->ctrlr); 1454 if (nvme_bdev_ctrlr->opal_dev == NULL) { 1455 SPDK_ERRLOG("Failed to initialize Opal\n"); 1456 } 1457 } 1458 1459 TAILQ_INSERT_HEAD(&nvme_bdev_ctrlr->trids, trid_entry, link); 1460 return 0; 1461 1462 err_init_ocssd: 1463 free(nvme_bdev_ctrlr->name); 1464 err_alloc_name: 1465 err_alloc_namespace: 1466 for (; i > 0; i--) { 1467 free(nvme_bdev_ctrlr->namespaces[i - 1]); 1468 } 1469 free(trid_entry); 1470 err_alloc_trid: 1471 free(nvme_bdev_ctrlr->namespaces); 1472 err_alloc_namespaces: 1473 free(nvme_bdev_ctrlr); 1474 return rc; 1475 } 1476 1477 static void 1478 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1479 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1480 { 1481 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1482 struct nvme_probe_ctx *ctx = cb_ctx; 1483 char *name = NULL; 1484 uint32_t prchk_flags = 0; 1485 size_t i; 1486 1487 if (ctx) { 1488 for (i = 0; i < ctx->count; i++) { 1489 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1490 prchk_flags = ctx->prchk_flags[i]; 1491 name = strdup(ctx->names[i]); 1492 break; 1493 } 1494 } 1495 } else { 1496 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); 1497 } 1498 if (!name) { 1499 SPDK_ERRLOG("Failed to assign name to NVMe device\n"); 1500 return; 1501 } 1502 1503 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name); 1504 1505 nvme_bdev_ctrlr_create(ctrlr, name, trid, prchk_flags); 1506 1507 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); 1508 if (!nvme_bdev_ctrlr) { 1509 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1510 free(name); 1511 return; 1512 } 1513 1514 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1515 1516 free(name); 1517 } 1518 1519 static void 1520 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 1521 { 1522 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1523 1524 pthread_mutex_lock(&g_bdev_nvme_mutex); 1525 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 1526 if (nvme_bdev_ctrlr->ctrlr == ctrlr) { 1527 /* The controller's destruction was already started */ 1528 if (nvme_bdev_ctrlr->destruct) { 1529 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1530 return; 1531 } 1532 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1533 1534 nvme_ctrlr_depopulate_namespaces(nvme_bdev_ctrlr); 1535 1536 pthread_mutex_lock(&g_bdev_nvme_mutex); 1537 nvme_bdev_ctrlr->destruct = true; 1538 if (nvme_bdev_ctrlr->ref == 0) { 1539 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1540 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1541 } else { 1542 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1543 } 1544 return; 1545 } 1546 } 1547 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1548 } 1549 1550 static int 1551 bdev_nvme_hotplug(void *arg) 1552 { 1553 struct spdk_nvme_transport_id trid_pcie; 1554 int done; 1555 1556 if (!g_hotplug_probe_ctx) { 1557 memset(&trid_pcie, 0, sizeof(trid_pcie)); 1558 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 1559 1560 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL, 1561 hotplug_probe_cb, 1562 attach_cb, remove_cb); 1563 if (!g_hotplug_probe_ctx) { 1564 return SPDK_POLLER_BUSY; 1565 } 1566 } 1567 1568 done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx); 1569 if (done != -EAGAIN) { 1570 g_hotplug_probe_ctx = NULL; 1571 } 1572 1573 return SPDK_POLLER_BUSY; 1574 } 1575 1576 void 1577 bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) 1578 { 1579 *opts = g_opts; 1580 } 1581 1582 int 1583 bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) 1584 { 1585 if (g_bdev_nvme_init_thread != NULL) { 1586 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 1587 return -EPERM; 1588 } 1589 } 1590 1591 g_opts = *opts; 1592 1593 return 0; 1594 } 1595 1596 struct set_nvme_hotplug_ctx { 1597 uint64_t period_us; 1598 bool enabled; 1599 spdk_msg_fn fn; 1600 void *fn_ctx; 1601 }; 1602 1603 static void 1604 set_nvme_hotplug_period_cb(void *_ctx) 1605 { 1606 struct set_nvme_hotplug_ctx *ctx = _ctx; 1607 1608 spdk_poller_unregister(&g_hotplug_poller); 1609 if (ctx->enabled) { 1610 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us); 1611 } 1612 1613 g_nvme_hotplug_poll_period_us = ctx->period_us; 1614 g_nvme_hotplug_enabled = ctx->enabled; 1615 if (ctx->fn) { 1616 ctx->fn(ctx->fn_ctx); 1617 } 1618 1619 free(ctx); 1620 } 1621 1622 int 1623 bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx) 1624 { 1625 struct set_nvme_hotplug_ctx *ctx; 1626 1627 if (enabled == true && !spdk_process_is_primary()) { 1628 return -EPERM; 1629 } 1630 1631 ctx = calloc(1, sizeof(*ctx)); 1632 if (ctx == NULL) { 1633 return -ENOMEM; 1634 } 1635 1636 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; 1637 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); 1638 ctx->enabled = enabled; 1639 ctx->fn = cb; 1640 ctx->fn_ctx = cb_ctx; 1641 1642 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); 1643 return 0; 1644 } 1645 1646 static void 1647 populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) 1648 { 1649 if (ctx->cb_fn) { 1650 ctx->cb_fn(ctx->cb_ctx, count, rc); 1651 } 1652 1653 free(ctx); 1654 } 1655 1656 static void 1657 nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx) 1658 { 1659 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1660 struct nvme_bdev_ns *ns; 1661 struct nvme_bdev *nvme_bdev, *tmp; 1662 uint32_t i, nsid; 1663 size_t j; 1664 1665 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(ctx->base_name); 1666 assert(nvme_bdev_ctrlr != NULL); 1667 1668 /* 1669 * Report the new bdevs that were created in this call. 1670 * There can be more than one bdev per NVMe controller. 1671 */ 1672 j = 0; 1673 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1674 nsid = i + 1; 1675 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1676 if (!ns->populated) { 1677 continue; 1678 } 1679 assert(ns->id == nsid); 1680 TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { 1681 if (j < ctx->count) { 1682 ctx->names[j] = nvme_bdev->disk.name; 1683 j++; 1684 } else { 1685 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n", 1686 ctx->count); 1687 populate_namespaces_cb(ctx, 0, -ERANGE); 1688 return; 1689 } 1690 } 1691 } 1692 1693 populate_namespaces_cb(ctx, j, 0); 1694 } 1695 1696 static void 1697 connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1698 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1699 { 1700 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 1701 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1702 struct nvme_async_probe_ctx *ctx; 1703 int rc; 1704 1705 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 1706 1707 spdk_poller_unregister(&ctx->poller); 1708 1709 rc = nvme_bdev_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags); 1710 if (rc) { 1711 SPDK_ERRLOG("Failed to create new device\n"); 1712 populate_namespaces_cb(ctx, 0, rc); 1713 return; 1714 } 1715 1716 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1717 assert(nvme_bdev_ctrlr != NULL); 1718 1719 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, ctx); 1720 } 1721 1722 static int 1723 bdev_nvme_async_poll(void *arg) 1724 { 1725 struct nvme_async_probe_ctx *ctx = arg; 1726 int rc; 1727 1728 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); 1729 if (spdk_unlikely(rc != -EAGAIN && rc != 0)) { 1730 spdk_poller_unregister(&ctx->poller); 1731 free(ctx); 1732 } 1733 1734 return SPDK_POLLER_BUSY; 1735 } 1736 1737 static int 1738 bdev_nvme_add_trid(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct spdk_nvme_transport_id *trid) 1739 { 1740 struct spdk_nvme_ctrlr *new_ctrlr; 1741 struct spdk_nvme_ctrlr_opts opts; 1742 uint32_t i; 1743 struct spdk_nvme_ns *ns, *new_ns; 1744 const struct spdk_nvme_ns_data *ns_data, *new_ns_data; 1745 struct nvme_bdev_ctrlr_trid *new_trid; 1746 int rc = 0; 1747 1748 assert(nvme_bdev_ctrlr != NULL); 1749 1750 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1751 SPDK_ERRLOG("PCIe failover is not supported.\n"); 1752 return -ENOTSUP; 1753 } 1754 1755 /* Currently we only support failover to the same transport type. */ 1756 if (nvme_bdev_ctrlr->connected_trid->trtype != trid->trtype) { 1757 return -EINVAL; 1758 } 1759 1760 /* Currently we only support failover to the same NQN. */ 1761 if (strncmp(trid->subnqn, nvme_bdev_ctrlr->connected_trid->subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 1762 return -EINVAL; 1763 } 1764 1765 /* Skip all the other checks if we've already registered this path. */ 1766 TAILQ_FOREACH(new_trid, &nvme_bdev_ctrlr->trids, link) { 1767 if (!spdk_nvme_transport_id_compare(&new_trid->trid, trid)) { 1768 return -EEXIST; 1769 } 1770 } 1771 1772 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 1773 opts.transport_retry_count = g_opts.retry_count; 1774 1775 new_ctrlr = spdk_nvme_connect(trid, &opts, sizeof(opts)); 1776 1777 if (new_ctrlr == NULL) { 1778 return -ENODEV; 1779 } 1780 1781 if (spdk_nvme_ctrlr_get_num_ns(new_ctrlr) != nvme_bdev_ctrlr->num_ns) { 1782 rc = -EINVAL; 1783 goto out; 1784 } 1785 1786 for (i = 1; i <= nvme_bdev_ctrlr->num_ns; i++) { 1787 ns = spdk_nvme_ctrlr_get_ns(nvme_bdev_ctrlr->ctrlr, i); 1788 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, i); 1789 assert(ns != NULL); 1790 assert(new_ns != NULL); 1791 1792 ns_data = spdk_nvme_ns_get_data(ns); 1793 new_ns_data = spdk_nvme_ns_get_data(new_ns); 1794 if (memcmp(ns_data->nguid, new_ns_data->nguid, sizeof(ns_data->nguid))) { 1795 rc = -EINVAL; 1796 goto out; 1797 } 1798 } 1799 1800 new_trid = calloc(1, sizeof(*new_trid)); 1801 if (new_trid == NULL) { 1802 rc = -ENOMEM; 1803 goto out; 1804 } 1805 new_trid->trid = *trid; 1806 TAILQ_INSERT_TAIL(&nvme_bdev_ctrlr->trids, new_trid, link); 1807 1808 out: 1809 spdk_nvme_detach(new_ctrlr); 1810 return rc; 1811 } 1812 1813 int 1814 bdev_nvme_remove_trid(const char *name, struct spdk_nvme_transport_id *trid) 1815 { 1816 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1817 struct nvme_bdev_ctrlr_trid *ctrlr_trid, *tmp_trid; 1818 1819 if (name == NULL) { 1820 return -EINVAL; 1821 } 1822 1823 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name); 1824 if (nvme_bdev_ctrlr == NULL) { 1825 SPDK_ERRLOG("Failed to find NVMe controller\n"); 1826 return -ENODEV; 1827 } 1828 1829 /* case 1: we are currently using the path to be removed. */ 1830 if (!spdk_nvme_transport_id_compare(trid, nvme_bdev_ctrlr->connected_trid)) { 1831 ctrlr_trid = TAILQ_FIRST(&nvme_bdev_ctrlr->trids); 1832 assert(nvme_bdev_ctrlr->connected_trid == &ctrlr_trid->trid); 1833 /* case 1A: the current path is the only path. */ 1834 if (!TAILQ_NEXT(ctrlr_trid, link)) { 1835 return bdev_nvme_delete(name); 1836 } 1837 1838 /* case 1B: there is an alternative path. */ 1839 if (bdev_nvme_reset(nvme_bdev_ctrlr, NULL, true) == -EAGAIN) { 1840 return -EAGAIN; 1841 } 1842 assert(nvme_bdev_ctrlr->connected_trid != &ctrlr_trid->trid); 1843 TAILQ_REMOVE(&nvme_bdev_ctrlr->trids, ctrlr_trid, link); 1844 free(ctrlr_trid); 1845 return 0; 1846 } 1847 /* case 2: We are not using the specified path. */ 1848 TAILQ_FOREACH_SAFE(ctrlr_trid, &nvme_bdev_ctrlr->trids, link, tmp_trid) { 1849 if (!spdk_nvme_transport_id_compare(&ctrlr_trid->trid, trid)) { 1850 TAILQ_REMOVE(&nvme_bdev_ctrlr->trids, ctrlr_trid, link); 1851 free(ctrlr_trid); 1852 return 0; 1853 } 1854 } 1855 1856 /* case 2A: The address isn't even in the registered list. */ 1857 return -ENXIO; 1858 } 1859 1860 int 1861 bdev_nvme_create(struct spdk_nvme_transport_id *trid, 1862 struct spdk_nvme_host_id *hostid, 1863 const char *base_name, 1864 const char **names, 1865 uint32_t count, 1866 const char *hostnqn, 1867 uint32_t prchk_flags, 1868 spdk_bdev_create_nvme_fn cb_fn, 1869 void *cb_ctx) 1870 { 1871 struct nvme_probe_skip_entry *entry, *tmp; 1872 struct nvme_async_probe_ctx *ctx; 1873 struct nvme_bdev_ctrlr *existing_ctrlr; 1874 int rc; 1875 1876 /* TODO expand this check to include both the host and target TRIDs. 1877 * Only if both are the same should we fail. 1878 */ 1879 if (nvme_bdev_ctrlr_get(trid) != NULL) { 1880 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); 1881 return -EEXIST; 1882 } 1883 1884 ctx = calloc(1, sizeof(*ctx)); 1885 if (!ctx) { 1886 return -ENOMEM; 1887 } 1888 ctx->base_name = base_name; 1889 ctx->names = names; 1890 ctx->count = count; 1891 ctx->cb_fn = cb_fn; 1892 ctx->cb_ctx = cb_ctx; 1893 ctx->prchk_flags = prchk_flags; 1894 ctx->trid = *trid; 1895 1896 existing_ctrlr = nvme_bdev_ctrlr_get_by_name(base_name); 1897 if (existing_ctrlr) { 1898 rc = bdev_nvme_add_trid(existing_ctrlr, trid); 1899 if (rc) { 1900 free(ctx); 1901 return rc; 1902 } 1903 1904 nvme_ctrlr_populate_namespaces_done(ctx); 1905 return 0; 1906 } 1907 1908 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1909 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { 1910 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1911 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1912 free(entry); 1913 break; 1914 } 1915 } 1916 } 1917 1918 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts)); 1919 ctx->opts.transport_retry_count = g_opts.retry_count; 1920 1921 if (hostnqn) { 1922 snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn); 1923 } 1924 1925 if (hostid->hostaddr[0] != '\0') { 1926 snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr); 1927 } 1928 1929 if (hostid->hostsvcid[0] != '\0') { 1930 snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid); 1931 } 1932 1933 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb); 1934 if (ctx->probe_ctx == NULL) { 1935 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr); 1936 free(ctx); 1937 return -ENODEV; 1938 } 1939 ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); 1940 1941 return 0; 1942 } 1943 1944 int 1945 bdev_nvme_delete(const char *name) 1946 { 1947 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; 1948 struct nvme_probe_skip_entry *entry; 1949 1950 if (name == NULL) { 1951 return -EINVAL; 1952 } 1953 1954 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name); 1955 if (nvme_bdev_ctrlr == NULL) { 1956 SPDK_ERRLOG("Failed to find NVMe controller\n"); 1957 return -ENODEV; 1958 } 1959 1960 if (nvme_bdev_ctrlr->connected_trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1961 entry = calloc(1, sizeof(*entry)); 1962 if (!entry) { 1963 return -ENOMEM; 1964 } 1965 entry->trid = *nvme_bdev_ctrlr->connected_trid; 1966 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq); 1967 } 1968 1969 remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); 1970 return 0; 1971 } 1972 1973 static int 1974 bdev_nvme_library_init(void) 1975 { 1976 g_bdev_nvme_init_thread = spdk_get_thread(); 1977 1978 spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_poll_group_create_cb, 1979 bdev_nvme_poll_group_destroy_cb, 1980 sizeof(struct nvme_bdev_poll_group), "bdev_nvme_poll_groups"); 1981 1982 return 0; 1983 } 1984 1985 static void 1986 bdev_nvme_library_fini(void) 1987 { 1988 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp; 1989 struct nvme_probe_skip_entry *entry, *entry_tmp; 1990 1991 spdk_poller_unregister(&g_hotplug_poller); 1992 free(g_hotplug_probe_ctx); 1993 1994 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) { 1995 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1996 free(entry); 1997 } 1998 1999 pthread_mutex_lock(&g_bdev_nvme_mutex); 2000 TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) { 2001 if (nvme_bdev_ctrlr->destruct) { 2002 /* This controller's destruction was already started 2003 * before the application started shutting down 2004 */ 2005 continue; 2006 } 2007 2008 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2009 2010 nvme_ctrlr_depopulate_namespaces(nvme_bdev_ctrlr); 2011 2012 pthread_mutex_lock(&g_bdev_nvme_mutex); 2013 nvme_bdev_ctrlr->destruct = true; 2014 2015 if (nvme_bdev_ctrlr->ref == 0) { 2016 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2017 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 2018 pthread_mutex_lock(&g_bdev_nvme_mutex); 2019 } 2020 } 2021 2022 g_bdev_nvme_module_finish = true; 2023 if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 2024 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2025 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL); 2026 spdk_bdev_module_finish_done(); 2027 return; 2028 } 2029 2030 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2031 } 2032 2033 static void 2034 bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io) 2035 { 2036 struct spdk_bdev *bdev = bdev_io->bdev; 2037 struct spdk_dif_ctx dif_ctx; 2038 struct spdk_dif_error err_blk = {}; 2039 int rc; 2040 2041 rc = spdk_dif_ctx_init(&dif_ctx, 2042 bdev->blocklen, bdev->md_len, bdev->md_interleave, 2043 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 2044 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0); 2045 if (rc != 0) { 2046 SPDK_ERRLOG("Initialization of DIF context failed\n"); 2047 return; 2048 } 2049 2050 if (bdev->md_interleave) { 2051 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2052 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2053 } else { 2054 struct iovec md_iov = { 2055 .iov_base = bdev_io->u.bdev.md_buf, 2056 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 2057 }; 2058 2059 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2060 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2061 } 2062 2063 if (rc != 0) { 2064 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2065 err_blk.err_type, err_blk.err_offset); 2066 } else { 2067 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n"); 2068 } 2069 } 2070 2071 static void 2072 bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2073 { 2074 struct nvme_bdev_io *bio = ref; 2075 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2076 2077 if (spdk_nvme_cpl_is_success(cpl)) { 2078 /* Run PI verification for read data buffer. */ 2079 bdev_nvme_verify_pi_error(bdev_io); 2080 } 2081 2082 /* Return original completion status */ 2083 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, 2084 bio->cpl.status.sc); 2085 } 2086 2087 static void 2088 bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2089 { 2090 struct nvme_bdev_io *bio = ref; 2091 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2092 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 2093 struct nvme_io_channel *nvme_ch; 2094 int ret; 2095 2096 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) { 2097 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n", 2098 cpl->status.sct, cpl->status.sc); 2099 2100 /* Save completion status to use after verifying PI error. */ 2101 bio->cpl = *cpl; 2102 2103 nvme_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io)); 2104 2105 /* Read without PI checking to verify PI error. */ 2106 ret = bdev_nvme_no_pi_readv(nbdev->nvme_ns, 2107 nvme_ch, 2108 bio, 2109 bdev_io->u.bdev.iovs, 2110 bdev_io->u.bdev.iovcnt, 2111 bdev_io->u.bdev.md_buf, 2112 bdev_io->u.bdev.num_blocks, 2113 bdev_io->u.bdev.offset_blocks); 2114 if (ret == 0) { 2115 return; 2116 } 2117 } 2118 2119 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2120 } 2121 2122 static void 2123 bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2124 { 2125 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2126 2127 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2128 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n", 2129 cpl->status.sct, cpl->status.sc); 2130 /* Run PI verification for write data buffer if PI error is detected. */ 2131 bdev_nvme_verify_pi_error(bdev_io); 2132 } 2133 2134 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2135 } 2136 2137 static void 2138 bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2139 { 2140 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2141 2142 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2143 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n", 2144 cpl->status.sct, cpl->status.sc); 2145 /* Run PI verification for compare data buffer if PI error is detected. */ 2146 bdev_nvme_verify_pi_error(bdev_io); 2147 } 2148 2149 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2150 } 2151 2152 static void 2153 bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2154 { 2155 struct nvme_bdev_io *bio = ref; 2156 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2157 2158 /* Compare operation completion */ 2159 if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { 2160 /* Save compare result for write callback */ 2161 bio->cpl = *cpl; 2162 return; 2163 } 2164 2165 /* Write operation completion */ 2166 if (spdk_nvme_cpl_is_error(&bio->cpl)) { 2167 /* If bio->cpl is already an error, it means the compare operation failed. In that case, 2168 * complete the IO with the compare operation's status. 2169 */ 2170 if (!spdk_nvme_cpl_is_error(cpl)) { 2171 SPDK_ERRLOG("Unexpected write success after compare failure.\n"); 2172 } 2173 2174 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2175 } else { 2176 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2177 } 2178 } 2179 2180 static void 2181 bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) 2182 { 2183 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2184 2185 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2186 } 2187 2188 static void 2189 bdev_nvme_admin_passthru_completion(void *ctx) 2190 { 2191 struct nvme_bdev_io *bio = ctx; 2192 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2193 2194 spdk_bdev_io_complete_nvme_status(bdev_io, 2195 bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2196 } 2197 2198 static void 2199 bdev_nvme_abort_completion(void *ctx) 2200 { 2201 struct nvme_bdev_io *bio = ctx; 2202 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2203 2204 if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) { 2205 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 2206 } else { 2207 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 2208 } 2209 } 2210 2211 static void 2212 bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl) 2213 { 2214 struct nvme_bdev_io *bio = ref; 2215 2216 bio->cpl = *cpl; 2217 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio); 2218 } 2219 2220 static void 2221 bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) 2222 { 2223 struct nvme_bdev_io *bio = ref; 2224 2225 bio->cpl = *cpl; 2226 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); 2227 } 2228 2229 static void 2230 bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) 2231 { 2232 struct nvme_bdev_io *bio = ref; 2233 struct iovec *iov; 2234 2235 bio->iov_offset = sgl_offset; 2236 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { 2237 iov = &bio->iovs[bio->iovpos]; 2238 if (bio->iov_offset < iov->iov_len) { 2239 break; 2240 } 2241 2242 bio->iov_offset -= iov->iov_len; 2243 } 2244 } 2245 2246 static int 2247 bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) 2248 { 2249 struct nvme_bdev_io *bio = ref; 2250 struct iovec *iov; 2251 2252 assert(bio->iovpos < bio->iovcnt); 2253 2254 iov = &bio->iovs[bio->iovpos]; 2255 2256 *address = iov->iov_base; 2257 *length = iov->iov_len; 2258 2259 if (bio->iov_offset) { 2260 assert(bio->iov_offset <= iov->iov_len); 2261 *address += bio->iov_offset; 2262 *length -= bio->iov_offset; 2263 } 2264 2265 bio->iov_offset += *length; 2266 if (bio->iov_offset == iov->iov_len) { 2267 bio->iovpos++; 2268 bio->iov_offset = 0; 2269 } 2270 2271 return 0; 2272 } 2273 2274 static void 2275 bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset) 2276 { 2277 struct nvme_bdev_io *bio = ref; 2278 struct iovec *iov; 2279 2280 bio->fused_iov_offset = sgl_offset; 2281 for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) { 2282 iov = &bio->fused_iovs[bio->fused_iovpos]; 2283 if (bio->fused_iov_offset < iov->iov_len) { 2284 break; 2285 } 2286 2287 bio->fused_iov_offset -= iov->iov_len; 2288 } 2289 } 2290 2291 static int 2292 bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length) 2293 { 2294 struct nvme_bdev_io *bio = ref; 2295 struct iovec *iov; 2296 2297 assert(bio->fused_iovpos < bio->fused_iovcnt); 2298 2299 iov = &bio->fused_iovs[bio->fused_iovpos]; 2300 2301 *address = iov->iov_base; 2302 *length = iov->iov_len; 2303 2304 if (bio->fused_iov_offset) { 2305 assert(bio->fused_iov_offset <= iov->iov_len); 2306 *address += bio->fused_iov_offset; 2307 *length -= bio->fused_iov_offset; 2308 } 2309 2310 bio->fused_iov_offset += *length; 2311 if (bio->fused_iov_offset == iov->iov_len) { 2312 bio->fused_iovpos++; 2313 bio->fused_iov_offset = 0; 2314 } 2315 2316 return 0; 2317 } 2318 2319 static int 2320 bdev_nvme_no_pi_readv(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2321 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2322 void *md, uint64_t lba_count, uint64_t lba) 2323 { 2324 int rc; 2325 2326 SPDK_DEBUGLOG(bdev_nvme, "read %lu blocks with offset %#lx without PI check\n", 2327 lba_count, lba); 2328 2329 bio->iovs = iov; 2330 bio->iovcnt = iovcnt; 2331 bio->iovpos = 0; 2332 bio->iov_offset = 0; 2333 2334 rc = spdk_nvme_ns_cmd_readv_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2335 bdev_nvme_no_pi_readv_done, bio, 0, 2336 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2337 md, 0, 0); 2338 2339 if (rc != 0 && rc != -ENOMEM) { 2340 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc); 2341 } 2342 return rc; 2343 } 2344 2345 static int 2346 bdev_nvme_readv(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2347 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2348 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags) 2349 { 2350 int rc; 2351 2352 SPDK_DEBUGLOG(bdev_nvme, "read %lu blocks with offset %#lx\n", 2353 lba_count, lba); 2354 2355 bio->iovs = iov; 2356 bio->iovcnt = iovcnt; 2357 bio->iovpos = 0; 2358 bio->iov_offset = 0; 2359 2360 if (iovcnt == 1) { 2361 rc = spdk_nvme_ns_cmd_read_with_md(nvme_ns->ns, nvme_ch->qpair, iov[0].iov_base, md, lba, 2362 lba_count, 2363 bdev_nvme_readv_done, bio, 2364 flags, 2365 0, 0); 2366 } else { 2367 rc = spdk_nvme_ns_cmd_readv_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2368 bdev_nvme_readv_done, bio, flags, 2369 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2370 md, 0, 0); 2371 } 2372 2373 if (rc != 0 && rc != -ENOMEM) { 2374 SPDK_ERRLOG("readv failed: rc = %d\n", rc); 2375 } 2376 return rc; 2377 } 2378 2379 static int 2380 bdev_nvme_writev(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2381 struct nvme_bdev_io *bio, 2382 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 2383 uint32_t flags) 2384 { 2385 int rc; 2386 2387 SPDK_DEBUGLOG(bdev_nvme, "write %lu blocks with offset %#lx\n", 2388 lba_count, lba); 2389 2390 bio->iovs = iov; 2391 bio->iovcnt = iovcnt; 2392 bio->iovpos = 0; 2393 bio->iov_offset = 0; 2394 2395 if (iovcnt == 1) { 2396 rc = spdk_nvme_ns_cmd_write_with_md(nvme_ns->ns, nvme_ch->qpair, iov[0].iov_base, md, lba, 2397 lba_count, 2398 bdev_nvme_readv_done, bio, 2399 flags, 2400 0, 0); 2401 } else { 2402 rc = spdk_nvme_ns_cmd_writev_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2403 bdev_nvme_writev_done, bio, flags, 2404 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2405 md, 0, 0); 2406 } 2407 2408 if (rc != 0 && rc != -ENOMEM) { 2409 SPDK_ERRLOG("writev failed: rc = %d\n", rc); 2410 } 2411 return rc; 2412 } 2413 2414 static int 2415 bdev_nvme_comparev(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2416 struct nvme_bdev_io *bio, 2417 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 2418 uint32_t flags) 2419 { 2420 int rc; 2421 2422 SPDK_DEBUGLOG(bdev_nvme, "compare %lu blocks with offset %#lx\n", 2423 lba_count, lba); 2424 2425 bio->iovs = iov; 2426 bio->iovcnt = iovcnt; 2427 bio->iovpos = 0; 2428 bio->iov_offset = 0; 2429 2430 rc = spdk_nvme_ns_cmd_comparev_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2431 bdev_nvme_comparev_done, bio, flags, 2432 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2433 md, 0, 0); 2434 2435 if (rc != 0 && rc != -ENOMEM) { 2436 SPDK_ERRLOG("comparev failed: rc = %d\n", rc); 2437 } 2438 return rc; 2439 } 2440 2441 static int 2442 bdev_nvme_comparev_and_writev(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2443 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, 2444 struct iovec *write_iov, int write_iovcnt, 2445 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags) 2446 { 2447 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2448 int rc; 2449 2450 SPDK_DEBUGLOG(bdev_nvme, "compare and write %lu blocks with offset %#lx\n", 2451 lba_count, lba); 2452 2453 bio->iovs = cmp_iov; 2454 bio->iovcnt = cmp_iovcnt; 2455 bio->iovpos = 0; 2456 bio->iov_offset = 0; 2457 bio->fused_iovs = write_iov; 2458 bio->fused_iovcnt = write_iovcnt; 2459 bio->fused_iovpos = 0; 2460 bio->fused_iov_offset = 0; 2461 2462 if (bdev_io->num_retries == 0) { 2463 bio->first_fused_submitted = false; 2464 } 2465 2466 if (!bio->first_fused_submitted) { 2467 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2468 memset(&bio->cpl, 0, sizeof(bio->cpl)); 2469 2470 rc = spdk_nvme_ns_cmd_comparev_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2471 bdev_nvme_comparev_and_writev_done, bio, flags, 2472 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0); 2473 if (rc == 0) { 2474 bio->first_fused_submitted = true; 2475 flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2476 } else { 2477 if (rc != -ENOMEM) { 2478 SPDK_ERRLOG("compare failed: rc = %d\n", rc); 2479 } 2480 return rc; 2481 } 2482 } 2483 2484 flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND; 2485 2486 rc = spdk_nvme_ns_cmd_writev_with_md(nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2487 bdev_nvme_comparev_and_writev_done, bio, flags, 2488 bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0); 2489 if (rc != 0 && rc != -ENOMEM) { 2490 SPDK_ERRLOG("write failed: rc = %d\n", rc); 2491 rc = 0; 2492 } 2493 2494 return rc; 2495 } 2496 2497 static int 2498 bdev_nvme_unmap(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2499 struct nvme_bdev_io *bio, 2500 uint64_t offset_blocks, 2501 uint64_t num_blocks) 2502 { 2503 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; 2504 struct spdk_nvme_dsm_range *range; 2505 uint64_t offset, remaining; 2506 uint64_t num_ranges_u64; 2507 uint16_t num_ranges; 2508 int rc; 2509 2510 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / 2511 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2512 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { 2513 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 2514 return -EINVAL; 2515 } 2516 num_ranges = (uint16_t)num_ranges_u64; 2517 2518 offset = offset_blocks; 2519 remaining = num_blocks; 2520 range = &dsm_ranges[0]; 2521 2522 /* Fill max-size ranges until the remaining blocks fit into one range */ 2523 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { 2524 range->attributes.raw = 0; 2525 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2526 range->starting_lba = offset; 2527 2528 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2529 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2530 range++; 2531 } 2532 2533 /* Final range describes the remaining blocks */ 2534 range->attributes.raw = 0; 2535 range->length = remaining; 2536 range->starting_lba = offset; 2537 2538 rc = spdk_nvme_ns_cmd_dataset_management(nvme_ns->ns, nvme_ch->qpair, 2539 SPDK_NVME_DSM_ATTR_DEALLOCATE, 2540 dsm_ranges, num_ranges, 2541 bdev_nvme_queued_done, bio); 2542 2543 return rc; 2544 } 2545 2546 static int 2547 bdev_nvme_admin_passthru(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2548 struct nvme_bdev_io *bio, 2549 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2550 { 2551 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ns->ctrlr->ctrlr); 2552 2553 if (nbytes > max_xfer_size) { 2554 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2555 return -EINVAL; 2556 } 2557 2558 bio->orig_thread = spdk_io_channel_get_thread(spdk_io_channel_from_ctx(nvme_ch)); 2559 2560 return spdk_nvme_ctrlr_cmd_admin_raw(nvme_ns->ctrlr->ctrlr, cmd, buf, 2561 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); 2562 } 2563 2564 static int 2565 bdev_nvme_io_passthru(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2566 struct nvme_bdev_io *bio, 2567 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2568 { 2569 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ns->ctrlr->ctrlr); 2570 2571 if (nbytes > max_xfer_size) { 2572 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2573 return -EINVAL; 2574 } 2575 2576 /* 2577 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2578 * so fill it out automatically. 2579 */ 2580 cmd->nsid = spdk_nvme_ns_get_id(nvme_ns->ns); 2581 2582 return spdk_nvme_ctrlr_cmd_io_raw(nvme_ns->ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2583 (uint32_t)nbytes, bdev_nvme_queued_done, bio); 2584 } 2585 2586 static int 2587 bdev_nvme_io_passthru_md(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2588 struct nvme_bdev_io *bio, 2589 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) 2590 { 2591 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nvme_ns->ns); 2592 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ns->ctrlr->ctrlr); 2593 2594 if (nbytes > max_xfer_size) { 2595 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2596 return -EINVAL; 2597 } 2598 2599 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nvme_ns->ns)) { 2600 SPDK_ERRLOG("invalid meta data buffer size\n"); 2601 return -EINVAL; 2602 } 2603 2604 /* 2605 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2606 * so fill it out automatically. 2607 */ 2608 cmd->nsid = spdk_nvme_ns_get_id(nvme_ns->ns); 2609 2610 return spdk_nvme_ctrlr_cmd_io_raw_with_md(nvme_ns->ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2611 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); 2612 } 2613 2614 static void 2615 bdev_nvme_abort_admin_cmd(void *ctx) 2616 { 2617 struct nvme_bdev_io *bio = ctx; 2618 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2619 struct nvme_bdev *nbdev; 2620 struct nvme_bdev_io *bio_to_abort; 2621 int rc; 2622 2623 nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 2624 bio_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx; 2625 2626 rc = spdk_nvme_ctrlr_cmd_abort_ext(nbdev->nvme_ns->ctrlr->ctrlr, 2627 NULL, 2628 bio_to_abort, 2629 bdev_nvme_abort_done, bio); 2630 if (rc == -ENOENT) { 2631 /* If no admin command was found in admin qpair, complete the abort 2632 * request with failure. 2633 */ 2634 bio->cpl.cdw0 |= 1U; 2635 bio->cpl.status.sc = SPDK_NVME_SC_SUCCESS; 2636 bio->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2637 2638 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio); 2639 } 2640 } 2641 2642 static int 2643 bdev_nvme_abort(struct nvme_bdev_ns *nvme_ns, struct nvme_io_channel *nvme_ch, 2644 struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort) 2645 { 2646 int rc; 2647 2648 bio->orig_thread = spdk_io_channel_get_thread(spdk_io_channel_from_ctx(nvme_ch)); 2649 2650 rc = spdk_nvme_ctrlr_cmd_abort_ext(nvme_ns->ctrlr->ctrlr, 2651 nvme_ch->qpair, 2652 bio_to_abort, 2653 bdev_nvme_abort_done, bio); 2654 if (rc == -ENOENT) { 2655 /* If no command was found in I/O qpair, the target command may be 2656 * admin command. Only a single thread tries aborting admin command 2657 * to clean I/O flow. 2658 */ 2659 spdk_thread_send_msg(nvme_ns->ctrlr->thread, 2660 bdev_nvme_abort_admin_cmd, bio); 2661 rc = 0; 2662 } 2663 2664 return rc; 2665 } 2666 2667 static void 2668 nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2669 { 2670 /* nop */ 2671 } 2672 2673 static void 2674 nvme_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2675 { 2676 g_config_json_namespace_fn[ns->type](w, ns); 2677 } 2678 2679 static int 2680 bdev_nvme_config_json(struct spdk_json_write_ctx *w) 2681 { 2682 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2683 struct spdk_nvme_transport_id *trid; 2684 const char *action; 2685 uint32_t nsid; 2686 2687 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { 2688 action = "reset"; 2689 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { 2690 action = "abort"; 2691 } else { 2692 action = "none"; 2693 } 2694 2695 spdk_json_write_object_begin(w); 2696 2697 spdk_json_write_named_string(w, "method", "bdev_nvme_set_options"); 2698 2699 spdk_json_write_named_object_begin(w, "params"); 2700 spdk_json_write_named_string(w, "action_on_timeout", action); 2701 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); 2702 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); 2703 spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst); 2704 spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight); 2705 spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight); 2706 spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight); 2707 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); 2708 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); 2709 spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); 2710 spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); 2711 spdk_json_write_object_end(w); 2712 2713 spdk_json_write_object_end(w); 2714 2715 pthread_mutex_lock(&g_bdev_nvme_mutex); 2716 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2717 trid = nvme_bdev_ctrlr->connected_trid; 2718 2719 spdk_json_write_object_begin(w); 2720 2721 spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller"); 2722 2723 spdk_json_write_named_object_begin(w, "params"); 2724 spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name); 2725 nvme_bdev_dump_trid_json(trid, w); 2726 spdk_json_write_named_bool(w, "prchk_reftag", 2727 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); 2728 spdk_json_write_named_bool(w, "prchk_guard", 2729 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); 2730 2731 spdk_json_write_object_end(w); 2732 2733 spdk_json_write_object_end(w); 2734 2735 for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) { 2736 if (!nvme_bdev_ctrlr->namespaces[nsid]->populated) { 2737 continue; 2738 } 2739 2740 nvme_namespace_config_json(w, nvme_bdev_ctrlr->namespaces[nsid]); 2741 } 2742 } 2743 2744 /* Dump as last parameter to give all NVMe bdevs chance to be constructed 2745 * before enabling hotplug poller. 2746 */ 2747 spdk_json_write_object_begin(w); 2748 spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug"); 2749 2750 spdk_json_write_named_object_begin(w, "params"); 2751 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); 2752 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); 2753 spdk_json_write_object_end(w); 2754 2755 spdk_json_write_object_end(w); 2756 2757 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2758 return 0; 2759 } 2760 2761 struct spdk_nvme_ctrlr * 2762 bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) 2763 { 2764 if (!bdev || bdev->module != &nvme_if) { 2765 return NULL; 2766 } 2767 2768 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ns->ctrlr->ctrlr; 2769 } 2770 2771 SPDK_LOG_REGISTER_COMPONENT(bdev_nvme) 2772