1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "bdev_nvme.h" 38 39 #include "spdk/accel_engine.h" 40 #include "spdk/config.h" 41 #include "spdk/endian.h" 42 #include "spdk/bdev.h" 43 #include "spdk/json.h" 44 #include "spdk/likely.h" 45 #include "spdk/nvme.h" 46 #include "spdk/nvme_ocssd.h" 47 #include "spdk/nvme_zns.h" 48 #include "spdk/opal.h" 49 #include "spdk/thread.h" 50 #include "spdk/string.h" 51 #include "spdk/util.h" 52 53 #include "spdk/bdev_module.h" 54 #include "spdk/log.h" 55 56 #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true 57 #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000) 58 59 static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); 60 61 struct nvme_bdev_io { 62 /** array of iovecs to transfer. */ 63 struct iovec *iovs; 64 65 /** Number of iovecs in iovs array. */ 66 int iovcnt; 67 68 /** Current iovec position. */ 69 int iovpos; 70 71 /** Offset in current iovec. */ 72 uint32_t iov_offset; 73 74 /** array of iovecs to transfer. */ 75 struct iovec *fused_iovs; 76 77 /** Number of iovecs in iovs array. */ 78 int fused_iovcnt; 79 80 /** Current iovec position. */ 81 int fused_iovpos; 82 83 /** Offset in current iovec. */ 84 uint32_t fused_iov_offset; 85 86 /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ 87 struct spdk_nvme_cpl cpl; 88 /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */ 89 struct spdk_nvme_ns_cmd_ext_io_opts ext_opts; 90 91 /** Originating thread */ 92 struct spdk_thread *orig_thread; 93 94 /** Keeps track if first of fused commands was submitted */ 95 bool first_fused_submitted; 96 97 /** Temporary pointer to zone report buffer */ 98 struct spdk_nvme_zns_zone_report *zone_report_buf; 99 100 /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */ 101 uint64_t handled_zones; 102 }; 103 104 struct nvme_probe_ctx { 105 size_t count; 106 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; 107 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; 108 const char *names[NVME_MAX_CONTROLLERS]; 109 uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; 110 const char *hostnqn; 111 }; 112 113 struct nvme_probe_skip_entry { 114 struct spdk_nvme_transport_id trid; 115 TAILQ_ENTRY(nvme_probe_skip_entry) tailq; 116 }; 117 /* All the controllers deleted by users via RPC are skipped by hotplug monitor */ 118 static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER( 119 g_skipped_nvme_ctrlrs); 120 121 static struct spdk_bdev_nvme_opts g_opts = { 122 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, 123 .timeout_us = 0, 124 .timeout_admin_us = 0, 125 .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS, 126 .retry_count = 4, 127 .arbitration_burst = 0, 128 .low_priority_weight = 0, 129 .medium_priority_weight = 0, 130 .high_priority_weight = 0, 131 .nvme_adminq_poll_period_us = 10000ULL, 132 .nvme_ioq_poll_period_us = 0, 133 .io_queue_requests = 0, 134 .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, 135 }; 136 137 #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL 138 #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL 139 140 static int g_hot_insert_nvme_controller_index = 0; 141 static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; 142 static bool g_nvme_hotplug_enabled = false; 143 static struct spdk_thread *g_bdev_nvme_init_thread; 144 static struct spdk_poller *g_hotplug_poller; 145 static struct spdk_poller *g_hotplug_probe_poller; 146 static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; 147 148 static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, 149 struct nvme_async_probe_ctx *ctx); 150 static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr, 151 struct nvme_async_probe_ctx *ctx); 152 static int bdev_nvme_library_init(void); 153 static void bdev_nvme_library_fini(void); 154 static int bdev_nvme_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 155 struct nvme_bdev_io *bio, 156 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 157 uint32_t flags, struct spdk_bdev_ext_io_opts *ext_opts); 158 static int bdev_nvme_no_pi_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 159 struct nvme_bdev_io *bio, 160 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 161 static int bdev_nvme_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 162 struct nvme_bdev_io *bio, 163 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 164 uint32_t flags, struct spdk_bdev_ext_io_opts *ext_opts); 165 static int bdev_nvme_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 166 struct nvme_bdev_io *bio, 167 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, 168 uint64_t zslba, uint32_t flags); 169 static int bdev_nvme_comparev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 170 struct nvme_bdev_io *bio, 171 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 172 uint32_t flags); 173 static int bdev_nvme_comparev_and_writev(struct spdk_nvme_ns *ns, 174 struct spdk_nvme_qpair *qpair, 175 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 176 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba, 177 uint32_t flags); 178 static int bdev_nvme_get_zone_info(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 179 struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones, 180 struct spdk_bdev_zone_info *info); 181 static int bdev_nvme_zone_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 182 struct nvme_bdev_io *bio, uint64_t zone_id, 183 enum spdk_bdev_zone_action action); 184 static int bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, 185 struct nvme_bdev_io *bio, 186 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 187 static int bdev_nvme_io_passthru(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 188 struct nvme_bdev_io *bio, 189 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 190 static int bdev_nvme_io_passthru_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 191 struct nvme_bdev_io *bio, 192 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); 193 static int bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, 194 struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort); 195 static int bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio); 196 static int bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove); 197 static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); 198 199 struct spdk_nvme_qpair * 200 bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) 201 { 202 struct nvme_ctrlr_channel *ctrlr_ch; 203 204 assert(ctrlr_io_ch != NULL); 205 206 ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); 207 208 return ctrlr_ch->qpair; 209 } 210 211 static int 212 bdev_nvme_get_ctx_size(void) 213 { 214 return sizeof(struct nvme_bdev_io); 215 } 216 217 static struct spdk_bdev_module nvme_if = { 218 .name = "nvme", 219 .async_fini = true, 220 .module_init = bdev_nvme_library_init, 221 .module_fini = bdev_nvme_library_fini, 222 .config_json = bdev_nvme_config_json, 223 .get_ctx_size = bdev_nvme_get_ctx_size, 224 225 }; 226 SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if) 227 228 struct nvme_ctrlrs g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs); 229 pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER; 230 bool g_bdev_nvme_module_finish; 231 232 struct nvme_ns * 233 nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid) 234 { 235 assert(nsid > 0); 236 assert(nsid <= nvme_ctrlr->num_ns); 237 if (nsid == 0 || nsid > nvme_ctrlr->num_ns) { 238 return NULL; 239 } 240 241 return nvme_ctrlr->namespaces[nsid - 1]; 242 } 243 244 struct nvme_ns * 245 nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr) 246 { 247 uint32_t i; 248 249 for (i = 0; i < nvme_ctrlr->num_ns; i++) { 250 if (nvme_ctrlr->namespaces[i] != NULL) { 251 return nvme_ctrlr->namespaces[i]; 252 } 253 } 254 255 return NULL; 256 } 257 258 struct nvme_ns * 259 nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns) 260 { 261 uint32_t i; 262 263 if (ns == NULL) { 264 return NULL; 265 } 266 267 /* ns->id is a 1's based value and we want to start at the next 268 * entry in this array, so we start at ns->id and don't subtract to 269 * convert to 0's based. */ 270 for (i = ns->id; i < nvme_ctrlr->num_ns; i++) { 271 if (nvme_ctrlr->namespaces[i] != NULL) { 272 return nvme_ctrlr->namespaces[i]; 273 } 274 } 275 276 return NULL; 277 } 278 279 static struct nvme_ctrlr * 280 nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid) 281 { 282 struct nvme_ctrlr *nvme_ctrlr; 283 284 pthread_mutex_lock(&g_bdev_nvme_mutex); 285 TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { 286 if (spdk_nvme_transport_id_compare(trid, nvme_ctrlr->connected_trid) == 0) { 287 break; 288 } 289 } 290 pthread_mutex_unlock(&g_bdev_nvme_mutex); 291 292 return nvme_ctrlr; 293 } 294 295 struct nvme_ctrlr * 296 nvme_ctrlr_get_by_name(const char *name) 297 { 298 struct nvme_ctrlr *nvme_ctrlr; 299 300 if (name == NULL) { 301 return NULL; 302 } 303 304 pthread_mutex_lock(&g_bdev_nvme_mutex); 305 TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { 306 if (strcmp(name, nvme_ctrlr->name) == 0) { 307 break; 308 } 309 } 310 pthread_mutex_unlock(&g_bdev_nvme_mutex); 311 312 return nvme_ctrlr; 313 } 314 315 void 316 nvme_ctrlr_for_each(nvme_ctrlr_for_each_fn fn, void *ctx) 317 { 318 struct nvme_ctrlr *nvme_ctrlr; 319 320 pthread_mutex_lock(&g_bdev_nvme_mutex); 321 TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { 322 fn(nvme_ctrlr, ctx); 323 } 324 pthread_mutex_unlock(&g_bdev_nvme_mutex); 325 } 326 327 void 328 nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w) 329 { 330 const char *trtype_str; 331 const char *adrfam_str; 332 333 trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype); 334 if (trtype_str) { 335 spdk_json_write_named_string(w, "trtype", trtype_str); 336 } 337 338 adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam); 339 if (adrfam_str) { 340 spdk_json_write_named_string(w, "adrfam", adrfam_str); 341 } 342 343 if (trid->traddr[0] != '\0') { 344 spdk_json_write_named_string(w, "traddr", trid->traddr); 345 } 346 347 if (trid->trsvcid[0] != '\0') { 348 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid); 349 } 350 351 if (trid->subnqn[0] != '\0') { 352 spdk_json_write_named_string(w, "subnqn", trid->subnqn); 353 } 354 } 355 356 static void 357 nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr) 358 { 359 struct nvme_ctrlr_trid *trid, *tmp_trid; 360 uint32_t i; 361 362 free(nvme_ctrlr->copied_ana_desc); 363 spdk_free(nvme_ctrlr->ana_log_page); 364 365 if (nvme_ctrlr->opal_dev) { 366 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev); 367 nvme_ctrlr->opal_dev = NULL; 368 } 369 370 pthread_mutex_lock(&g_bdev_nvme_mutex); 371 TAILQ_REMOVE(&g_nvme_ctrlrs, nvme_ctrlr, tailq); 372 pthread_mutex_unlock(&g_bdev_nvme_mutex); 373 spdk_nvme_detach(nvme_ctrlr->ctrlr); 374 spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller); 375 free(nvme_ctrlr->name); 376 for (i = 0; i < nvme_ctrlr->num_ns; i++) { 377 free(nvme_ctrlr->namespaces[i]); 378 } 379 380 TAILQ_FOREACH_SAFE(trid, &nvme_ctrlr->trids, link, tmp_trid) { 381 TAILQ_REMOVE(&nvme_ctrlr->trids, trid, link); 382 free(trid); 383 } 384 385 pthread_mutex_destroy(&nvme_ctrlr->mutex); 386 387 free(nvme_ctrlr->namespaces); 388 free(nvme_ctrlr); 389 } 390 391 static void 392 nvme_ctrlr_unregister_cb(void *io_device) 393 { 394 struct nvme_ctrlr *nvme_ctrlr = io_device; 395 396 nvme_ctrlr_delete(nvme_ctrlr); 397 398 pthread_mutex_lock(&g_bdev_nvme_mutex); 399 if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_ctrlrs)) { 400 pthread_mutex_unlock(&g_bdev_nvme_mutex); 401 spdk_io_device_unregister(&g_nvme_ctrlrs, NULL); 402 spdk_bdev_module_fini_done(); 403 return; 404 } 405 406 pthread_mutex_unlock(&g_bdev_nvme_mutex); 407 } 408 409 static void 410 nvme_ctrlr_unregister(void *ctx) 411 { 412 struct nvme_ctrlr *nvme_ctrlr = ctx; 413 414 spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb); 415 } 416 417 static void 418 nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr) 419 { 420 pthread_mutex_lock(&nvme_ctrlr->mutex); 421 422 assert(nvme_ctrlr->ref > 0); 423 nvme_ctrlr->ref--; 424 425 if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct || 426 nvme_ctrlr->resetting) { 427 pthread_mutex_unlock(&nvme_ctrlr->mutex); 428 return; 429 } 430 431 pthread_mutex_unlock(&nvme_ctrlr->mutex); 432 433 nvme_ctrlr_unregister(nvme_ctrlr); 434 } 435 436 static int 437 bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf) 438 { 439 struct nvme_bdev_channel *nbdev_ch = ctx_buf; 440 struct nvme_bdev *nbdev = io_device; 441 struct nvme_ns *nvme_ns; 442 struct spdk_io_channel *ch; 443 444 nvme_ns = nbdev->nvme_ns; 445 446 ch = spdk_get_io_channel(nvme_ns->ctrlr); 447 if (ch == NULL) { 448 SPDK_ERRLOG("Failed to alloc io_channel.\n"); 449 return -ENOMEM; 450 } 451 452 nbdev_ch->ctrlr_ch = spdk_io_channel_get_ctx(ch); 453 nbdev_ch->nvme_ns = nvme_ns; 454 455 return 0; 456 } 457 458 static void 459 bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf) 460 { 461 struct nvme_bdev_channel *nbdev_ch = ctx_buf; 462 struct spdk_io_channel *ch; 463 464 ch = spdk_io_channel_from_ctx(nbdev_ch->ctrlr_ch); 465 spdk_put_io_channel(ch); 466 } 467 468 static inline bool 469 bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch, 470 struct spdk_nvme_ns **_ns, struct spdk_nvme_qpair **_qpair) 471 { 472 if (spdk_unlikely(nbdev_ch->ctrlr_ch->qpair == NULL)) { 473 /* The device is currently resetting. */ 474 return false; 475 } 476 477 *_ns = nbdev_ch->nvme_ns->ns; 478 *_qpair = nbdev_ch->ctrlr_ch->qpair; 479 return true; 480 } 481 482 static inline bool 483 bdev_nvme_find_admin_path(struct nvme_bdev_channel *nbdev_ch, 484 struct nvme_ctrlr **_nvme_ctrlr) 485 { 486 *_nvme_ctrlr = nbdev_ch->ctrlr_ch->ctrlr; 487 return true; 488 } 489 490 static inline void 491 bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, 492 const struct spdk_nvme_cpl *cpl) 493 { 494 spdk_bdev_io_complete_nvme_status(spdk_bdev_io_from_ctx(bio), cpl->cdw0, 495 cpl->status.sct, cpl->status.sc); 496 } 497 498 static inline void 499 bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc) 500 { 501 enum spdk_bdev_io_status io_status; 502 503 if (rc == 0) { 504 io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 505 } else if (rc == -ENOMEM) { 506 io_status = SPDK_BDEV_IO_STATUS_NOMEM; 507 } else { 508 io_status = SPDK_BDEV_IO_STATUS_FAILED; 509 } 510 511 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), io_status); 512 } 513 514 static void 515 bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 516 { 517 int rc; 518 519 SPDK_DEBUGLOG(bdev_nvme, "qpair %p is disconnected, attempting reconnect.\n", qpair); 520 /* 521 * Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will 522 * reconnect a qpair and we will stop getting a callback for this one. 523 */ 524 rc = spdk_nvme_ctrlr_reconnect_io_qpair(qpair); 525 if (rc != 0) { 526 SPDK_DEBUGLOG(bdev_nvme, "Failed to reconnect to qpair %p, errno %d\n", qpair, -rc); 527 } 528 } 529 530 static int 531 bdev_nvme_poll(void *arg) 532 { 533 struct nvme_poll_group *group = arg; 534 int64_t num_completions; 535 536 if (group->collect_spin_stat && group->start_ticks == 0) { 537 group->start_ticks = spdk_get_ticks(); 538 } 539 540 num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, 541 bdev_nvme_disconnected_qpair_cb); 542 if (group->collect_spin_stat) { 543 if (num_completions > 0) { 544 if (group->end_ticks != 0) { 545 group->spin_ticks += (group->end_ticks - group->start_ticks); 546 group->end_ticks = 0; 547 } 548 group->start_ticks = 0; 549 } else { 550 group->end_ticks = spdk_get_ticks(); 551 } 552 } 553 554 return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 555 } 556 557 static int 558 bdev_nvme_poll_adminq(void *arg) 559 { 560 int32_t rc; 561 struct nvme_ctrlr *nvme_ctrlr = arg; 562 563 assert(nvme_ctrlr != NULL); 564 565 rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr); 566 if (rc < 0) { 567 bdev_nvme_failover(nvme_ctrlr, false); 568 } 569 570 return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; 571 } 572 573 static void 574 _bdev_nvme_unregister_dev_cb(void *io_device) 575 { 576 struct nvme_bdev *nvme_disk = io_device; 577 578 free(nvme_disk->disk.name); 579 free(nvme_disk); 580 } 581 582 static int 583 bdev_nvme_destruct(void *ctx) 584 { 585 struct nvme_bdev *nvme_disk = ctx; 586 struct nvme_ns *nvme_ns = nvme_disk->nvme_ns; 587 588 pthread_mutex_lock(&nvme_ns->ctrlr->mutex); 589 590 nvme_ns->bdev = NULL; 591 592 assert(nvme_ns->id > 0); 593 594 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) { 595 pthread_mutex_unlock(&nvme_ns->ctrlr->mutex); 596 597 nvme_ctrlr_release(nvme_ns->ctrlr); 598 free(nvme_ns); 599 } else { 600 pthread_mutex_unlock(&nvme_ns->ctrlr->mutex); 601 } 602 603 spdk_io_device_unregister(nvme_disk, _bdev_nvme_unregister_dev_cb); 604 605 return 0; 606 } 607 608 static int 609 bdev_nvme_flush(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 610 struct nvme_bdev_io *bio, uint64_t offset, uint64_t nbytes) 611 { 612 bdev_nvme_io_complete(bio, 0); 613 614 return 0; 615 } 616 617 static int 618 bdev_nvme_create_qpair(struct nvme_ctrlr_channel *ctrlr_ch) 619 { 620 struct spdk_nvme_ctrlr *ctrlr = ctrlr_ch->ctrlr->ctrlr; 621 struct spdk_nvme_io_qpair_opts opts; 622 struct spdk_nvme_qpair *qpair; 623 int rc; 624 625 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 626 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 627 opts.create_only = true; 628 opts.async_mode = true; 629 opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests); 630 g_opts.io_queue_requests = opts.io_queue_requests; 631 632 qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); 633 if (qpair == NULL) { 634 return -1; 635 } 636 637 assert(ctrlr_ch->group != NULL); 638 639 rc = spdk_nvme_poll_group_add(ctrlr_ch->group->group, qpair); 640 if (rc != 0) { 641 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n"); 642 goto err; 643 } 644 645 rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair); 646 if (rc != 0) { 647 SPDK_ERRLOG("Unable to connect I/O qpair.\n"); 648 goto err; 649 } 650 651 ctrlr_ch->qpair = qpair; 652 653 return 0; 654 655 err: 656 spdk_nvme_ctrlr_free_io_qpair(qpair); 657 658 return rc; 659 } 660 661 static void 662 bdev_nvme_destroy_qpair(struct nvme_ctrlr_channel *ctrlr_ch) 663 { 664 if (ctrlr_ch->qpair != NULL) { 665 spdk_nvme_ctrlr_free_io_qpair(ctrlr_ch->qpair); 666 ctrlr_ch->qpair = NULL; 667 } 668 } 669 670 static void 671 _bdev_nvme_check_pending_destruct(struct nvme_ctrlr *nvme_ctrlr) 672 { 673 pthread_mutex_lock(&nvme_ctrlr->mutex); 674 if (nvme_ctrlr->destruct_after_reset) { 675 assert(nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct); 676 pthread_mutex_unlock(&nvme_ctrlr->mutex); 677 678 spdk_thread_send_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, 679 nvme_ctrlr); 680 } else { 681 pthread_mutex_unlock(&nvme_ctrlr->mutex); 682 } 683 } 684 685 static void 686 bdev_nvme_check_pending_destruct(struct spdk_io_channel_iter *i, int status) 687 { 688 struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i); 689 690 _bdev_nvme_check_pending_destruct(nvme_ctrlr); 691 } 692 693 static void 694 _bdev_nvme_complete_pending_resets(struct nvme_ctrlr_channel *ctrlr_ch, 695 enum spdk_bdev_io_status status) 696 { 697 struct spdk_bdev_io *bdev_io; 698 699 while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) { 700 bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets); 701 TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link); 702 spdk_bdev_io_complete(bdev_io, status); 703 } 704 } 705 706 static void 707 bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i) 708 { 709 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 710 struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch); 711 712 _bdev_nvme_complete_pending_resets(ctrlr_ch, SPDK_BDEV_IO_STATUS_SUCCESS); 713 714 spdk_for_each_channel_continue(i, 0); 715 } 716 717 static void 718 bdev_nvme_abort_pending_resets(struct spdk_io_channel_iter *i) 719 { 720 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 721 struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch); 722 723 _bdev_nvme_complete_pending_resets(ctrlr_ch, SPDK_BDEV_IO_STATUS_FAILED); 724 725 spdk_for_each_channel_continue(i, 0); 726 } 727 728 static void 729 bdev_nvme_reset_complete(struct nvme_ctrlr *nvme_ctrlr, int rc) 730 { 731 struct nvme_ctrlr_trid *curr_trid; 732 bdev_nvme_reset_cb reset_cb_fn = nvme_ctrlr->reset_cb_fn; 733 void *reset_cb_arg = nvme_ctrlr->reset_cb_arg; 734 735 nvme_ctrlr->reset_cb_fn = NULL; 736 nvme_ctrlr->reset_cb_arg = NULL; 737 738 if (rc) { 739 SPDK_ERRLOG("Resetting controller failed.\n"); 740 } else { 741 SPDK_NOTICELOG("Resetting controller successful.\n"); 742 } 743 744 pthread_mutex_lock(&nvme_ctrlr->mutex); 745 nvme_ctrlr->resetting = false; 746 nvme_ctrlr->failover_in_progress = false; 747 748 curr_trid = TAILQ_FIRST(&nvme_ctrlr->trids); 749 assert(curr_trid != NULL); 750 assert(&curr_trid->trid == nvme_ctrlr->connected_trid); 751 752 curr_trid->is_failed = rc != 0 ? true : false; 753 754 if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct) { 755 /* Destruct ctrlr after clearing pending resets. */ 756 nvme_ctrlr->destruct_after_reset = true; 757 } 758 759 pthread_mutex_unlock(&nvme_ctrlr->mutex); 760 761 if (reset_cb_fn) { 762 reset_cb_fn(reset_cb_arg, rc); 763 } 764 765 /* Make sure we clear any pending resets before returning. */ 766 spdk_for_each_channel(nvme_ctrlr, 767 rc == 0 ? bdev_nvme_complete_pending_resets : 768 bdev_nvme_abort_pending_resets, 769 NULL, 770 bdev_nvme_check_pending_destruct); 771 } 772 773 static void 774 bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status) 775 { 776 struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i); 777 778 bdev_nvme_reset_complete(nvme_ctrlr, status); 779 } 780 781 static void 782 bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) 783 { 784 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 785 struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch); 786 int rc; 787 788 rc = bdev_nvme_create_qpair(ctrlr_ch); 789 790 spdk_for_each_channel_continue(i, rc); 791 } 792 793 static int 794 bdev_nvme_ctrlr_reset_poll(void *arg) 795 { 796 struct nvme_ctrlr *nvme_ctrlr = arg; 797 int rc; 798 799 rc = spdk_nvme_ctrlr_reset_poll_async(nvme_ctrlr->reset_ctx); 800 if (rc == -EAGAIN) { 801 return SPDK_POLLER_BUSY; 802 } 803 804 spdk_poller_unregister(&nvme_ctrlr->reset_poller); 805 if (rc == 0) { 806 /* Recreate all of the I/O queue pairs */ 807 spdk_for_each_channel(nvme_ctrlr, 808 bdev_nvme_reset_create_qpair, 809 NULL, 810 bdev_nvme_reset_create_qpairs_done); 811 } else { 812 bdev_nvme_reset_complete(nvme_ctrlr, rc); 813 } 814 return SPDK_POLLER_BUSY; 815 } 816 817 static void 818 bdev_nvme_reset_ctrlr(struct spdk_io_channel_iter *i, int status) 819 { 820 struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i); 821 int rc; 822 823 if (status) { 824 rc = status; 825 goto err; 826 } 827 828 rc = spdk_nvme_ctrlr_reset_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->reset_ctx); 829 if (rc != 0) { 830 SPDK_ERRLOG("Create controller reset context failed\n"); 831 goto err; 832 } 833 assert(nvme_ctrlr->reset_poller == NULL); 834 nvme_ctrlr->reset_poller = SPDK_POLLER_REGISTER(bdev_nvme_ctrlr_reset_poll, 835 nvme_ctrlr, 0); 836 837 return; 838 839 err: 840 bdev_nvme_reset_complete(nvme_ctrlr, rc); 841 } 842 843 static void 844 bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) 845 { 846 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 847 struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch); 848 849 bdev_nvme_destroy_qpair(ctrlr_ch); 850 spdk_for_each_channel_continue(i, 0); 851 } 852 853 static int 854 bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr) 855 { 856 pthread_mutex_lock(&nvme_ctrlr->mutex); 857 if (nvme_ctrlr->destruct) { 858 pthread_mutex_unlock(&nvme_ctrlr->mutex); 859 return -ENXIO; 860 } 861 862 if (nvme_ctrlr->resetting) { 863 pthread_mutex_unlock(&nvme_ctrlr->mutex); 864 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 865 return -EBUSY; 866 } 867 868 nvme_ctrlr->resetting = true; 869 pthread_mutex_unlock(&nvme_ctrlr->mutex); 870 spdk_nvme_ctrlr_prepare_for_reset(nvme_ctrlr->ctrlr); 871 872 /* First, delete all NVMe I/O queue pairs. */ 873 spdk_for_each_channel(nvme_ctrlr, 874 bdev_nvme_reset_destroy_qpair, 875 NULL, 876 bdev_nvme_reset_ctrlr); 877 878 return 0; 879 } 880 881 int 882 bdev_nvme_reset_rpc(struct nvme_ctrlr *nvme_ctrlr, bdev_nvme_reset_cb cb_fn, void *cb_arg) 883 { 884 int rc; 885 886 rc = bdev_nvme_reset(nvme_ctrlr); 887 if (rc == 0) { 888 nvme_ctrlr->reset_cb_fn = cb_fn; 889 nvme_ctrlr->reset_cb_arg = cb_arg; 890 } 891 return rc; 892 } 893 894 static void 895 bdev_nvme_reset_io_complete(void *cb_arg, int rc) 896 { 897 struct nvme_bdev_io *bio = cb_arg; 898 899 bdev_nvme_io_complete(bio, rc); 900 } 901 902 static int 903 bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio) 904 { 905 struct nvme_ctrlr_channel *ctrlr_ch = nbdev_ch->ctrlr_ch; 906 struct spdk_bdev_io *bdev_io; 907 int rc; 908 909 rc = bdev_nvme_reset(ctrlr_ch->ctrlr); 910 if (rc == 0) { 911 assert(ctrlr_ch->ctrlr->reset_cb_fn == NULL); 912 assert(ctrlr_ch->ctrlr->reset_cb_arg == NULL); 913 ctrlr_ch->ctrlr->reset_cb_fn = bdev_nvme_reset_io_complete; 914 ctrlr_ch->ctrlr->reset_cb_arg = bio; 915 } else if (rc == -EBUSY) { 916 /* 917 * Reset call is queued only if it is from the app framework. This is on purpose so that 918 * we don't interfere with the app framework reset strategy. i.e. we are deferring to the 919 * upper level. If they are in the middle of a reset, we won't try to schedule another one. 920 */ 921 bdev_io = spdk_bdev_io_from_ctx(bio); 922 TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link); 923 } else { 924 return rc; 925 } 926 927 return 0; 928 } 929 930 static int 931 bdev_nvme_failover_start(struct nvme_ctrlr *nvme_ctrlr, bool remove) 932 { 933 struct nvme_ctrlr_trid *curr_trid = NULL, *next_trid = NULL; 934 int rc; 935 936 pthread_mutex_lock(&nvme_ctrlr->mutex); 937 if (nvme_ctrlr->destruct) { 938 pthread_mutex_unlock(&nvme_ctrlr->mutex); 939 /* Don't bother resetting if the controller is in the process of being destructed. */ 940 return -ENXIO; 941 } 942 943 curr_trid = TAILQ_FIRST(&nvme_ctrlr->trids); 944 assert(curr_trid); 945 assert(&curr_trid->trid == nvme_ctrlr->connected_trid); 946 next_trid = TAILQ_NEXT(curr_trid, link); 947 948 if (nvme_ctrlr->resetting) { 949 if (next_trid && !nvme_ctrlr->failover_in_progress) { 950 rc = -EBUSY; 951 } else { 952 rc = -EALREADY; 953 } 954 pthread_mutex_unlock(&nvme_ctrlr->mutex); 955 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 956 return rc; 957 } 958 959 nvme_ctrlr->resetting = true; 960 curr_trid->is_failed = true; 961 962 if (next_trid) { 963 assert(curr_trid->trid.trtype != SPDK_NVME_TRANSPORT_PCIE); 964 965 SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", curr_trid->trid.traddr, 966 curr_trid->trid.trsvcid, next_trid->trid.traddr, next_trid->trid.trsvcid); 967 968 nvme_ctrlr->failover_in_progress = true; 969 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr); 970 nvme_ctrlr->connected_trid = &next_trid->trid; 971 rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_trid->trid); 972 assert(rc == 0); 973 TAILQ_REMOVE(&nvme_ctrlr->trids, curr_trid, link); 974 if (!remove) { 975 /** Shuffle the old trid to the end of the list and use the new one. 976 * Allows for round robin through multiple connections. 977 */ 978 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, curr_trid, link); 979 } else { 980 free(curr_trid); 981 } 982 } 983 984 pthread_mutex_unlock(&nvme_ctrlr->mutex); 985 return 0; 986 } 987 988 static int 989 bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove) 990 { 991 int rc; 992 993 rc = bdev_nvme_failover_start(nvme_ctrlr, remove); 994 if (rc == 0) { 995 /* First, delete all NVMe I/O queue pairs. */ 996 spdk_for_each_channel(nvme_ctrlr, 997 bdev_nvme_reset_destroy_qpair, 998 NULL, 999 bdev_nvme_reset_ctrlr); 1000 } else if (rc != -EALREADY) { 1001 return rc; 1002 } 1003 1004 return 0; 1005 } 1006 1007 static int 1008 bdev_nvme_unmap(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 1009 struct nvme_bdev_io *bio, 1010 uint64_t offset_blocks, 1011 uint64_t num_blocks); 1012 1013 static int 1014 bdev_nvme_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 1015 struct nvme_bdev_io *bio, 1016 uint64_t offset_blocks, 1017 uint64_t num_blocks); 1018 1019 static void 1020 bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 1021 bool success) 1022 { 1023 struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx; 1024 struct spdk_bdev *bdev = bdev_io->bdev; 1025 struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch); 1026 struct spdk_nvme_ns *ns; 1027 struct spdk_nvme_qpair *qpair; 1028 int ret; 1029 1030 if (!success) { 1031 ret = -EINVAL; 1032 goto exit; 1033 } 1034 1035 if (spdk_unlikely(!bdev_nvme_find_io_path(nbdev_ch, &ns, &qpair))) { 1036 ret = -ENXIO; 1037 goto exit; 1038 } 1039 1040 ret = bdev_nvme_readv(ns, 1041 qpair, 1042 bio, 1043 bdev_io->u.bdev.iovs, 1044 bdev_io->u.bdev.iovcnt, 1045 bdev_io->u.bdev.md_buf, 1046 bdev_io->u.bdev.num_blocks, 1047 bdev_io->u.bdev.offset_blocks, 1048 bdev->dif_check_flags, 1049 bdev_io->internal.ext_opts); 1050 1051 exit: 1052 if (spdk_unlikely(ret != 0)) { 1053 bdev_nvme_io_complete(bio, ret); 1054 } 1055 } 1056 1057 static void 1058 bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 1059 { 1060 struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch); 1061 struct spdk_bdev *bdev = bdev_io->bdev; 1062 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; 1063 struct nvme_bdev_io *nbdev_io_to_abort; 1064 struct spdk_nvme_ns *ns; 1065 struct spdk_nvme_qpair *qpair; 1066 int rc = 0; 1067 1068 if (spdk_unlikely(!bdev_nvme_find_io_path(nbdev_ch, &ns, &qpair))) { 1069 rc = -ENXIO; 1070 goto exit; 1071 } 1072 1073 switch (bdev_io->type) { 1074 case SPDK_BDEV_IO_TYPE_READ: 1075 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) { 1076 rc = bdev_nvme_readv(ns, 1077 qpair, 1078 nbdev_io, 1079 bdev_io->u.bdev.iovs, 1080 bdev_io->u.bdev.iovcnt, 1081 bdev_io->u.bdev.md_buf, 1082 bdev_io->u.bdev.num_blocks, 1083 bdev_io->u.bdev.offset_blocks, 1084 bdev->dif_check_flags, 1085 bdev_io->internal.ext_opts); 1086 } else { 1087 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, 1088 bdev_io->u.bdev.num_blocks * bdev->blocklen); 1089 rc = 0; 1090 } 1091 break; 1092 case SPDK_BDEV_IO_TYPE_WRITE: 1093 rc = bdev_nvme_writev(ns, 1094 qpair, 1095 nbdev_io, 1096 bdev_io->u.bdev.iovs, 1097 bdev_io->u.bdev.iovcnt, 1098 bdev_io->u.bdev.md_buf, 1099 bdev_io->u.bdev.num_blocks, 1100 bdev_io->u.bdev.offset_blocks, 1101 bdev->dif_check_flags, 1102 bdev_io->internal.ext_opts); 1103 break; 1104 case SPDK_BDEV_IO_TYPE_COMPARE: 1105 rc = bdev_nvme_comparev(ns, 1106 qpair, 1107 nbdev_io, 1108 bdev_io->u.bdev.iovs, 1109 bdev_io->u.bdev.iovcnt, 1110 bdev_io->u.bdev.md_buf, 1111 bdev_io->u.bdev.num_blocks, 1112 bdev_io->u.bdev.offset_blocks, 1113 bdev->dif_check_flags); 1114 break; 1115 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 1116 rc = bdev_nvme_comparev_and_writev(ns, 1117 qpair, 1118 nbdev_io, 1119 bdev_io->u.bdev.iovs, 1120 bdev_io->u.bdev.iovcnt, 1121 bdev_io->u.bdev.fused_iovs, 1122 bdev_io->u.bdev.fused_iovcnt, 1123 bdev_io->u.bdev.md_buf, 1124 bdev_io->u.bdev.num_blocks, 1125 bdev_io->u.bdev.offset_blocks, 1126 bdev->dif_check_flags); 1127 break; 1128 case SPDK_BDEV_IO_TYPE_UNMAP: 1129 rc = bdev_nvme_unmap(ns, 1130 qpair, 1131 nbdev_io, 1132 bdev_io->u.bdev.offset_blocks, 1133 bdev_io->u.bdev.num_blocks); 1134 break; 1135 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1136 rc = bdev_nvme_write_zeroes(ns, qpair, 1137 nbdev_io, 1138 bdev_io->u.bdev.offset_blocks, 1139 bdev_io->u.bdev.num_blocks); 1140 break; 1141 case SPDK_BDEV_IO_TYPE_RESET: 1142 rc = bdev_nvme_reset_io(nbdev_ch, nbdev_io); 1143 break; 1144 case SPDK_BDEV_IO_TYPE_FLUSH: 1145 rc = bdev_nvme_flush(ns, 1146 qpair, 1147 nbdev_io, 1148 bdev_io->u.bdev.offset_blocks, 1149 bdev_io->u.bdev.num_blocks); 1150 break; 1151 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 1152 rc = bdev_nvme_zone_appendv(ns, 1153 qpair, 1154 nbdev_io, 1155 bdev_io->u.bdev.iovs, 1156 bdev_io->u.bdev.iovcnt, 1157 bdev_io->u.bdev.md_buf, 1158 bdev_io->u.bdev.num_blocks, 1159 bdev_io->u.bdev.offset_blocks, 1160 bdev->dif_check_flags); 1161 break; 1162 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 1163 rc = bdev_nvme_get_zone_info(ns, 1164 qpair, 1165 nbdev_io, 1166 bdev_io->u.zone_mgmt.zone_id, 1167 bdev_io->u.zone_mgmt.num_zones, 1168 bdev_io->u.zone_mgmt.buf); 1169 break; 1170 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 1171 rc = bdev_nvme_zone_management(ns, 1172 qpair, 1173 nbdev_io, 1174 bdev_io->u.zone_mgmt.zone_id, 1175 bdev_io->u.zone_mgmt.zone_action); 1176 break; 1177 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 1178 rc = bdev_nvme_admin_passthru(nbdev_ch, 1179 nbdev_io, 1180 &bdev_io->u.nvme_passthru.cmd, 1181 bdev_io->u.nvme_passthru.buf, 1182 bdev_io->u.nvme_passthru.nbytes); 1183 break; 1184 case SPDK_BDEV_IO_TYPE_NVME_IO: 1185 rc = bdev_nvme_io_passthru(ns, 1186 qpair, 1187 nbdev_io, 1188 &bdev_io->u.nvme_passthru.cmd, 1189 bdev_io->u.nvme_passthru.buf, 1190 bdev_io->u.nvme_passthru.nbytes); 1191 break; 1192 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 1193 rc = bdev_nvme_io_passthru_md(ns, 1194 qpair, 1195 nbdev_io, 1196 &bdev_io->u.nvme_passthru.cmd, 1197 bdev_io->u.nvme_passthru.buf, 1198 bdev_io->u.nvme_passthru.nbytes, 1199 bdev_io->u.nvme_passthru.md_buf, 1200 bdev_io->u.nvme_passthru.md_len); 1201 break; 1202 case SPDK_BDEV_IO_TYPE_ABORT: 1203 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx; 1204 rc = bdev_nvme_abort(nbdev_ch, 1205 nbdev_io, 1206 nbdev_io_to_abort); 1207 break; 1208 default: 1209 rc = -EINVAL; 1210 break; 1211 } 1212 1213 exit: 1214 if (spdk_unlikely(rc != 0)) { 1215 bdev_nvme_io_complete(nbdev_io, rc); 1216 } 1217 } 1218 1219 static bool 1220 bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 1221 { 1222 struct nvme_bdev *nbdev = ctx; 1223 struct nvme_ns *nvme_ns; 1224 struct spdk_nvme_ns *ns; 1225 struct spdk_nvme_ctrlr *ctrlr; 1226 const struct spdk_nvme_ctrlr_data *cdata; 1227 1228 nvme_ns = nbdev->nvme_ns; 1229 assert(nvme_ns != NULL); 1230 ns = nvme_ns->ns; 1231 ctrlr = spdk_nvme_ns_get_ctrlr(ns); 1232 1233 switch (io_type) { 1234 case SPDK_BDEV_IO_TYPE_READ: 1235 case SPDK_BDEV_IO_TYPE_WRITE: 1236 case SPDK_BDEV_IO_TYPE_RESET: 1237 case SPDK_BDEV_IO_TYPE_FLUSH: 1238 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 1239 case SPDK_BDEV_IO_TYPE_NVME_IO: 1240 case SPDK_BDEV_IO_TYPE_ABORT: 1241 return true; 1242 1243 case SPDK_BDEV_IO_TYPE_COMPARE: 1244 return spdk_nvme_ns_supports_compare(ns); 1245 1246 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 1247 return spdk_nvme_ns_get_md_size(ns) ? true : false; 1248 1249 case SPDK_BDEV_IO_TYPE_UNMAP: 1250 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 1251 return cdata->oncs.dsm; 1252 1253 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1254 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 1255 return cdata->oncs.write_zeroes; 1256 1257 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 1258 if (spdk_nvme_ctrlr_get_flags(ctrlr) & 1259 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) { 1260 return true; 1261 } 1262 return false; 1263 1264 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 1265 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 1266 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS; 1267 1268 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 1269 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS && 1270 spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; 1271 1272 default: 1273 return false; 1274 } 1275 } 1276 1277 static int 1278 bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf) 1279 { 1280 struct nvme_ctrlr *nvme_ctrlr = io_device; 1281 struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf; 1282 struct spdk_io_channel *pg_ch; 1283 int rc; 1284 1285 pg_ch = spdk_get_io_channel(&g_nvme_ctrlrs); 1286 if (!pg_ch) { 1287 return -1; 1288 } 1289 1290 ctrlr_ch->group = spdk_io_channel_get_ctx(pg_ch); 1291 1292 #ifdef SPDK_CONFIG_VTUNE 1293 ctrlr_ch->group->collect_spin_stat = true; 1294 #else 1295 ctrlr_ch->group->collect_spin_stat = false; 1296 #endif 1297 1298 TAILQ_INIT(&ctrlr_ch->pending_resets); 1299 1300 ctrlr_ch->ctrlr = nvme_ctrlr; 1301 1302 rc = bdev_nvme_create_qpair(ctrlr_ch); 1303 if (rc != 0) { 1304 goto err_qpair; 1305 } 1306 1307 return 0; 1308 1309 err_qpair: 1310 spdk_put_io_channel(pg_ch); 1311 1312 return rc; 1313 } 1314 1315 static void 1316 bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf) 1317 { 1318 struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf; 1319 1320 assert(ctrlr_ch->group != NULL); 1321 1322 bdev_nvme_destroy_qpair(ctrlr_ch); 1323 1324 spdk_put_io_channel(spdk_io_channel_from_ctx(ctrlr_ch->group)); 1325 } 1326 1327 static void 1328 bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov, 1329 uint32_t iov_cnt, uint32_t seed, 1330 spdk_nvme_accel_completion_cb cb_fn, void *cb_arg) 1331 { 1332 struct nvme_poll_group *group = ctx; 1333 int rc; 1334 1335 assert(group->accel_channel != NULL); 1336 assert(cb_fn != NULL); 1337 1338 rc = spdk_accel_submit_crc32cv(group->accel_channel, dst, iov, iov_cnt, seed, cb_fn, cb_arg); 1339 if (rc) { 1340 /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */ 1341 if (rc == -ENOMEM || rc == -EINVAL) { 1342 cb_fn(cb_arg, rc); 1343 } 1344 SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov); 1345 } 1346 } 1347 1348 static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = { 1349 .table_size = sizeof(struct spdk_nvme_accel_fn_table), 1350 .submit_accel_crc32c = bdev_nvme_submit_accel_crc32c, 1351 }; 1352 1353 static int 1354 bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf) 1355 { 1356 struct nvme_poll_group *group = ctx_buf; 1357 1358 group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table); 1359 if (group->group == NULL) { 1360 return -1; 1361 } 1362 1363 group->accel_channel = spdk_accel_engine_get_io_channel(); 1364 if (!group->accel_channel) { 1365 spdk_nvme_poll_group_destroy(group->group); 1366 SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n", 1367 group); 1368 return -1; 1369 } 1370 1371 group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); 1372 1373 if (group->poller == NULL) { 1374 spdk_put_io_channel(group->accel_channel); 1375 spdk_nvme_poll_group_destroy(group->group); 1376 return -1; 1377 } 1378 1379 return 0; 1380 } 1381 1382 static void 1383 bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf) 1384 { 1385 struct nvme_poll_group *group = ctx_buf; 1386 1387 if (group->accel_channel) { 1388 spdk_put_io_channel(group->accel_channel); 1389 } 1390 1391 spdk_poller_unregister(&group->poller); 1392 if (spdk_nvme_poll_group_destroy(group->group)) { 1393 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n"); 1394 assert(false); 1395 } 1396 } 1397 1398 static struct spdk_io_channel * 1399 bdev_nvme_get_io_channel(void *ctx) 1400 { 1401 struct nvme_bdev *nvme_bdev = ctx; 1402 1403 return spdk_get_io_channel(nvme_bdev); 1404 } 1405 1406 static void * 1407 bdev_nvme_get_module_ctx(void *ctx) 1408 { 1409 struct nvme_bdev *nvme_bdev = ctx; 1410 1411 if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if || !nvme_bdev->nvme_ns) { 1412 return NULL; 1413 } 1414 1415 return nvme_bdev->nvme_ns->ns; 1416 } 1417 1418 static const char * 1419 _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state) 1420 { 1421 switch (ana_state) { 1422 case SPDK_NVME_ANA_OPTIMIZED_STATE: 1423 return "optimized"; 1424 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE: 1425 return "non_optimized"; 1426 case SPDK_NVME_ANA_INACCESSIBLE_STATE: 1427 return "inaccessible"; 1428 case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE: 1429 return "persistent_loss"; 1430 case SPDK_NVME_ANA_CHANGE_STATE: 1431 return "change"; 1432 default: 1433 return NULL; 1434 } 1435 } 1436 1437 static int 1438 bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 1439 { 1440 struct nvme_bdev *nbdev = ctx; 1441 struct spdk_memory_domain *domain; 1442 1443 domain = spdk_nvme_ctrlr_get_memory_domain(nbdev->nvme_ns->ctrlr->ctrlr); 1444 1445 if (domain) { 1446 if (array_size > 0 && domains) { 1447 domains[0] = domain; 1448 } 1449 return 1; 1450 } 1451 1452 return 0; 1453 } 1454 1455 static int 1456 bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 1457 { 1458 struct nvme_bdev *nvme_bdev = ctx; 1459 struct nvme_ns *nvme_ns; 1460 struct spdk_nvme_ns *ns; 1461 struct spdk_nvme_ctrlr *ctrlr; 1462 const struct spdk_nvme_ctrlr_data *cdata; 1463 const struct spdk_nvme_transport_id *trid; 1464 union spdk_nvme_vs_register vs; 1465 union spdk_nvme_csts_register csts; 1466 char buf[128]; 1467 1468 nvme_ns = nvme_bdev->nvme_ns; 1469 assert(nvme_ns != NULL); 1470 ns = nvme_ns->ns; 1471 ctrlr = spdk_nvme_ns_get_ctrlr(ns); 1472 1473 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 1474 trid = spdk_nvme_ctrlr_get_transport_id(ctrlr); 1475 vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr); 1476 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1477 1478 spdk_json_write_named_object_begin(w, "nvme"); 1479 1480 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1481 spdk_json_write_named_string(w, "pci_address", trid->traddr); 1482 } 1483 1484 spdk_json_write_named_object_begin(w, "trid"); 1485 1486 nvme_bdev_dump_trid_json(trid, w); 1487 1488 spdk_json_write_object_end(w); 1489 1490 #ifdef SPDK_CONFIG_NVME_CUSE 1491 size_t cuse_name_size = 128; 1492 char cuse_name[cuse_name_size]; 1493 1494 int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns), 1495 cuse_name, &cuse_name_size); 1496 if (rc == 0) { 1497 spdk_json_write_named_string(w, "cuse_device", cuse_name); 1498 } 1499 #endif 1500 1501 spdk_json_write_named_object_begin(w, "ctrlr_data"); 1502 1503 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); 1504 1505 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); 1506 spdk_str_trim(buf); 1507 spdk_json_write_named_string(w, "model_number", buf); 1508 1509 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); 1510 spdk_str_trim(buf); 1511 spdk_json_write_named_string(w, "serial_number", buf); 1512 1513 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); 1514 spdk_str_trim(buf); 1515 spdk_json_write_named_string(w, "firmware_revision", buf); 1516 1517 if (cdata->subnqn[0] != '\0') { 1518 spdk_json_write_named_string(w, "subnqn", cdata->subnqn); 1519 } 1520 1521 spdk_json_write_named_object_begin(w, "oacs"); 1522 1523 spdk_json_write_named_uint32(w, "security", cdata->oacs.security); 1524 spdk_json_write_named_uint32(w, "format", cdata->oacs.format); 1525 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); 1526 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); 1527 1528 spdk_json_write_object_end(w); 1529 1530 spdk_json_write_object_end(w); 1531 1532 spdk_json_write_named_object_begin(w, "vs"); 1533 1534 spdk_json_write_name(w, "nvme_version"); 1535 if (vs.bits.ter) { 1536 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); 1537 } else { 1538 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); 1539 } 1540 1541 spdk_json_write_object_end(w); 1542 1543 spdk_json_write_named_object_begin(w, "csts"); 1544 1545 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); 1546 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); 1547 1548 spdk_json_write_object_end(w); 1549 1550 spdk_json_write_named_object_begin(w, "ns_data"); 1551 1552 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); 1553 1554 if (cdata->cmic.ana_reporting) { 1555 spdk_json_write_named_string(w, "ana_state", 1556 _nvme_ana_state_str(nvme_ns->ana_state)); 1557 } 1558 1559 spdk_json_write_object_end(w); 1560 1561 if (cdata->oacs.security) { 1562 spdk_json_write_named_object_begin(w, "security"); 1563 1564 spdk_json_write_named_bool(w, "opal", nvme_bdev->opal); 1565 1566 spdk_json_write_object_end(w); 1567 } 1568 1569 spdk_json_write_object_end(w); 1570 1571 return 0; 1572 } 1573 1574 static void 1575 bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 1576 { 1577 /* No config per bdev needed */ 1578 } 1579 1580 static uint64_t 1581 bdev_nvme_get_spin_time(struct spdk_io_channel *ch) 1582 { 1583 struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch); 1584 struct nvme_ctrlr_channel *ctrlr_ch = nbdev_ch->ctrlr_ch; 1585 struct nvme_poll_group *group = ctrlr_ch->group; 1586 uint64_t spin_time; 1587 1588 if (!group || !group->collect_spin_stat) { 1589 return 0; 1590 } 1591 1592 if (group->end_ticks != 0) { 1593 group->spin_ticks += (group->end_ticks - group->start_ticks); 1594 group->end_ticks = 0; 1595 } 1596 1597 spin_time = (group->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); 1598 group->start_ticks = 0; 1599 group->spin_ticks = 0; 1600 1601 return spin_time; 1602 } 1603 1604 static const struct spdk_bdev_fn_table nvmelib_fn_table = { 1605 .destruct = bdev_nvme_destruct, 1606 .submit_request = bdev_nvme_submit_request, 1607 .io_type_supported = bdev_nvme_io_type_supported, 1608 .get_io_channel = bdev_nvme_get_io_channel, 1609 .dump_info_json = bdev_nvme_dump_info_json, 1610 .write_config_json = bdev_nvme_write_config_json, 1611 .get_spin_time = bdev_nvme_get_spin_time, 1612 .get_module_ctx = bdev_nvme_get_module_ctx, 1613 .get_memory_domains = bdev_nvme_get_memory_domains, 1614 }; 1615 1616 typedef int (*bdev_nvme_parse_ana_log_page_cb)( 1617 const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg); 1618 1619 static int 1620 bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr, 1621 bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg) 1622 { 1623 struct spdk_nvme_ana_group_descriptor *copied_desc; 1624 uint8_t *orig_desc; 1625 uint32_t i, desc_size, copy_len; 1626 int rc = 0; 1627 1628 if (nvme_ctrlr->ana_log_page == NULL) { 1629 return -EINVAL; 1630 } 1631 1632 copied_desc = nvme_ctrlr->copied_ana_desc; 1633 1634 orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page); 1635 copy_len = nvme_ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page); 1636 1637 for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) { 1638 memcpy(copied_desc, orig_desc, copy_len); 1639 1640 rc = cb_fn(copied_desc, cb_arg); 1641 if (rc != 0) { 1642 break; 1643 } 1644 1645 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) + 1646 copied_desc->num_of_nsid * sizeof(uint32_t); 1647 orig_desc += desc_size; 1648 copy_len -= desc_size; 1649 } 1650 1651 return rc; 1652 } 1653 1654 static int 1655 nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg) 1656 { 1657 struct nvme_ns *nvme_ns = cb_arg; 1658 uint32_t i; 1659 1660 for (i = 0; i < desc->num_of_nsid; i++) { 1661 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) { 1662 continue; 1663 } 1664 nvme_ns->ana_group_id = desc->ana_group_id; 1665 nvme_ns->ana_state = desc->ana_state; 1666 return 1; 1667 } 1668 1669 return 0; 1670 } 1671 1672 static int 1673 nvme_disk_create(struct spdk_bdev *disk, const char *base_name, 1674 struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns, 1675 uint32_t prchk_flags, void *ctx) 1676 { 1677 const struct spdk_uuid *uuid; 1678 const uint8_t *nguid; 1679 const struct spdk_nvme_ctrlr_data *cdata; 1680 const struct spdk_nvme_ns_data *nsdata; 1681 enum spdk_nvme_csi csi; 1682 uint32_t atomic_bs, phys_bs, bs; 1683 1684 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 1685 csi = spdk_nvme_ns_get_csi(ns); 1686 1687 switch (csi) { 1688 case SPDK_NVME_CSI_NVM: 1689 disk->product_name = "NVMe disk"; 1690 break; 1691 case SPDK_NVME_CSI_ZNS: 1692 disk->product_name = "NVMe ZNS disk"; 1693 disk->zoned = true; 1694 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns); 1695 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) / 1696 spdk_nvme_ns_get_extended_sector_size(ns); 1697 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns); 1698 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns); 1699 break; 1700 default: 1701 SPDK_ERRLOG("unsupported CSI: %u\n", csi); 1702 return -ENOTSUP; 1703 } 1704 1705 disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns)); 1706 if (!disk->name) { 1707 return -ENOMEM; 1708 } 1709 1710 disk->write_cache = 0; 1711 if (cdata->vwc.present) { 1712 /* Enable if the Volatile Write Cache exists */ 1713 disk->write_cache = 1; 1714 } 1715 if (cdata->oncs.write_zeroes) { 1716 disk->max_write_zeroes = UINT16_MAX + 1; 1717 } 1718 disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns); 1719 disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns); 1720 disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); 1721 1722 nguid = spdk_nvme_ns_get_nguid(ns); 1723 if (!nguid) { 1724 uuid = spdk_nvme_ns_get_uuid(ns); 1725 if (uuid) { 1726 disk->uuid = *uuid; 1727 } 1728 } else { 1729 memcpy(&disk->uuid, nguid, sizeof(disk->uuid)); 1730 } 1731 1732 nsdata = spdk_nvme_ns_get_data(ns); 1733 bs = spdk_nvme_ns_get_sector_size(ns); 1734 atomic_bs = bs; 1735 phys_bs = bs; 1736 if (nsdata->nabo == 0) { 1737 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) { 1738 atomic_bs = bs * (1 + nsdata->nawupf); 1739 } else { 1740 atomic_bs = bs * (1 + cdata->awupf); 1741 } 1742 } 1743 if (nsdata->nsfeat.optperf) { 1744 phys_bs = bs * (1 + nsdata->npwg); 1745 } 1746 disk->phys_blocklen = spdk_min(phys_bs, atomic_bs); 1747 1748 disk->md_len = spdk_nvme_ns_get_md_size(ns); 1749 if (disk->md_len != 0) { 1750 disk->md_interleave = nsdata->flbas.extended; 1751 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns); 1752 if (disk->dif_type != SPDK_DIF_DISABLE) { 1753 disk->dif_is_head_of_md = nsdata->dps.md_start; 1754 disk->dif_check_flags = prchk_flags; 1755 } 1756 } 1757 1758 if (!(spdk_nvme_ctrlr_get_flags(ctrlr) & 1759 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) { 1760 disk->acwu = 0; 1761 } else if (nsdata->nsfeat.ns_atomic_write_unit) { 1762 disk->acwu = nsdata->nacwu; 1763 } else { 1764 disk->acwu = cdata->acwu; 1765 } 1766 1767 disk->ctxt = ctx; 1768 disk->fn_table = &nvmelib_fn_table; 1769 disk->module = &nvme_if; 1770 1771 return 0; 1772 } 1773 1774 static int 1775 nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) 1776 { 1777 struct nvme_bdev *bdev; 1778 int rc; 1779 1780 bdev = calloc(1, sizeof(*bdev)); 1781 if (!bdev) { 1782 SPDK_ERRLOG("bdev calloc() failed\n"); 1783 return -ENOMEM; 1784 } 1785 1786 bdev->nvme_ns = nvme_ns; 1787 bdev->opal = nvme_ctrlr->opal_dev != NULL; 1788 1789 rc = nvme_disk_create(&bdev->disk, nvme_ctrlr->name, nvme_ctrlr->ctrlr, 1790 nvme_ns->ns, nvme_ctrlr->prchk_flags, bdev); 1791 if (rc != 0) { 1792 SPDK_ERRLOG("Failed to create NVMe disk\n"); 1793 free(bdev); 1794 return rc; 1795 } 1796 1797 spdk_io_device_register(bdev, 1798 bdev_nvme_create_bdev_channel_cb, 1799 bdev_nvme_destroy_bdev_channel_cb, 1800 sizeof(struct nvme_bdev_channel), 1801 bdev->disk.name); 1802 1803 rc = spdk_bdev_register(&bdev->disk); 1804 if (rc != 0) { 1805 SPDK_ERRLOG("spdk_bdev_register() failed\n"); 1806 spdk_io_device_unregister(bdev, NULL); 1807 free(bdev->disk.name); 1808 free(bdev); 1809 return rc; 1810 } 1811 1812 nvme_ns->bdev = bdev; 1813 1814 return 0; 1815 } 1816 1817 static bool 1818 bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) 1819 { 1820 const struct spdk_nvme_ns_data *nsdata1, *nsdata2; 1821 const struct spdk_uuid *uuid1, *uuid2; 1822 1823 nsdata1 = spdk_nvme_ns_get_data(ns1); 1824 nsdata2 = spdk_nvme_ns_get_data(ns2); 1825 uuid1 = spdk_nvme_ns_get_uuid(ns1); 1826 uuid2 = spdk_nvme_ns_get_uuid(ns2); 1827 1828 return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 && 1829 nsdata1->eui64 == nsdata2->eui64 && 1830 uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0; 1831 } 1832 1833 static bool 1834 hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1835 struct spdk_nvme_ctrlr_opts *opts) 1836 { 1837 struct nvme_probe_skip_entry *entry; 1838 1839 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) { 1840 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1841 return false; 1842 } 1843 } 1844 1845 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1846 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1847 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1848 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1849 opts->disable_read_ana_log_page = true; 1850 1851 SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr); 1852 1853 return true; 1854 } 1855 1856 static void 1857 nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) 1858 { 1859 struct nvme_ctrlr *nvme_ctrlr = ctx; 1860 1861 if (spdk_nvme_cpl_is_error(cpl)) { 1862 SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc, 1863 cpl->status.sct); 1864 bdev_nvme_reset(nvme_ctrlr); 1865 } else if (cpl->cdw0 & 0x1) { 1866 SPDK_WARNLOG("Specified command could not be aborted.\n"); 1867 bdev_nvme_reset(nvme_ctrlr); 1868 } 1869 } 1870 1871 static void 1872 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, 1873 struct spdk_nvme_qpair *qpair, uint16_t cid) 1874 { 1875 struct nvme_ctrlr *nvme_ctrlr = cb_arg; 1876 union spdk_nvme_csts_register csts; 1877 int rc; 1878 1879 assert(nvme_ctrlr->ctrlr == ctrlr); 1880 1881 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); 1882 1883 /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O 1884 * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we 1885 * would submit another fabrics cmd on the admin queue to read CSTS and check for its 1886 * completion recursively. 1887 */ 1888 if (nvme_ctrlr->connected_trid->trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) { 1889 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1890 if (csts.bits.cfs) { 1891 SPDK_ERRLOG("Controller Fatal Status, reset required\n"); 1892 bdev_nvme_reset(nvme_ctrlr); 1893 return; 1894 } 1895 } 1896 1897 switch (g_opts.action_on_timeout) { 1898 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 1899 if (qpair) { 1900 /* Don't send abort to ctrlr when reset is running. */ 1901 pthread_mutex_lock(&nvme_ctrlr->mutex); 1902 if (nvme_ctrlr->resetting) { 1903 pthread_mutex_unlock(&nvme_ctrlr->mutex); 1904 SPDK_NOTICELOG("Quit abort. Ctrlr is in the process of reseting.\n"); 1905 return; 1906 } 1907 pthread_mutex_unlock(&nvme_ctrlr->mutex); 1908 1909 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, 1910 nvme_abort_cpl, nvme_ctrlr); 1911 if (rc == 0) { 1912 return; 1913 } 1914 1915 SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc); 1916 } 1917 1918 /* FALLTHROUGH */ 1919 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 1920 bdev_nvme_reset(nvme_ctrlr); 1921 break; 1922 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 1923 SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n"); 1924 break; 1925 default: 1926 SPDK_ERRLOG("An invalid timeout action value is found.\n"); 1927 break; 1928 } 1929 } 1930 1931 static void 1932 nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc) 1933 { 1934 struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr; 1935 struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx; 1936 1937 if (rc == 0) { 1938 nvme_ns->probe_ctx = NULL; 1939 pthread_mutex_lock(&nvme_ctrlr->mutex); 1940 nvme_ctrlr->ref++; 1941 pthread_mutex_unlock(&nvme_ctrlr->mutex); 1942 } else { 1943 nvme_ctrlr->namespaces[nvme_ns->id - 1] = NULL; 1944 free(nvme_ns); 1945 } 1946 1947 if (ctx) { 1948 ctx->populates_in_progress--; 1949 if (ctx->populates_in_progress == 0) { 1950 nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx); 1951 } 1952 } 1953 } 1954 1955 static void 1956 nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) 1957 { 1958 struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr; 1959 struct spdk_nvme_ns *ns; 1960 int rc = 0; 1961 1962 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 1963 if (!ns) { 1964 SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id); 1965 rc = -EINVAL; 1966 goto done; 1967 } 1968 1969 nvme_ns->ns = ns; 1970 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE; 1971 1972 if (nvme_ctrlr->ana_log_page != NULL) { 1973 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns); 1974 } 1975 1976 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns); 1977 1978 done: 1979 nvme_ctrlr_populate_namespace_done(nvme_ns, rc); 1980 } 1981 1982 static void 1983 nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns) 1984 { 1985 struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr; 1986 1987 assert(nvme_ctrlr != NULL); 1988 1989 pthread_mutex_lock(&nvme_ctrlr->mutex); 1990 1991 nvme_ctrlr->namespaces[nvme_ns->id - 1] = NULL; 1992 1993 if (nvme_ns->bdev != NULL) { 1994 pthread_mutex_unlock(&nvme_ctrlr->mutex); 1995 return; 1996 } 1997 1998 free(nvme_ns); 1999 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2000 2001 nvme_ctrlr_release(nvme_ctrlr); 2002 } 2003 2004 static void 2005 nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) 2006 { 2007 struct nvme_bdev *bdev; 2008 2009 bdev = nvme_ns->bdev; 2010 if (bdev != NULL) { 2011 spdk_bdev_unregister(&bdev->disk, NULL, NULL); 2012 } 2013 2014 nvme_ctrlr_depopulate_namespace_done(nvme_ns); 2015 } 2016 2017 static void 2018 nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, 2019 struct nvme_async_probe_ctx *ctx) 2020 { 2021 struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr; 2022 struct nvme_ns *nvme_ns, *next; 2023 struct spdk_nvme_ns *ns; 2024 struct nvme_bdev *bdev; 2025 uint32_t nsid; 2026 int rc; 2027 uint64_t num_sectors; 2028 2029 if (ctx) { 2030 /* Initialize this count to 1 to handle the populate functions 2031 * calling nvme_ctrlr_populate_namespace_done() immediately. 2032 */ 2033 ctx->populates_in_progress = 1; 2034 } 2035 2036 /* First loop over our existing namespaces and see if they have been 2037 * removed. */ 2038 nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); 2039 while (nvme_ns != NULL) { 2040 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns); 2041 2042 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) { 2043 /* NS is still there but attributes may have changed */ 2044 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 2045 num_sectors = spdk_nvme_ns_get_num_sectors(ns); 2046 bdev = nvme_ns->bdev; 2047 assert(bdev != NULL); 2048 if (bdev->disk.blockcnt != num_sectors) { 2049 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n", 2050 nvme_ns->id, 2051 bdev->disk.name, 2052 bdev->disk.blockcnt, 2053 num_sectors); 2054 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors); 2055 if (rc != 0) { 2056 SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n", 2057 bdev->disk.name, rc); 2058 } 2059 } 2060 } else { 2061 /* Namespace was removed */ 2062 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns); 2063 } 2064 2065 nvme_ns = next; 2066 } 2067 2068 /* Loop through all of the namespaces at the nvme level and see if any of them are new */ 2069 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 2070 while (nsid != 0) { 2071 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); 2072 2073 if (nvme_ns == NULL) { 2074 /* Found a new one */ 2075 nvme_ns = calloc(1, sizeof(struct nvme_ns)); 2076 if (nvme_ns == NULL) { 2077 SPDK_ERRLOG("Failed to allocate namespace\n"); 2078 /* This just fails to attach the namespace. It may work on a future attempt. */ 2079 continue; 2080 } 2081 2082 nvme_ctrlr->namespaces[nsid - 1] = nvme_ns; 2083 2084 nvme_ns->id = nsid; 2085 nvme_ns->ctrlr = nvme_ctrlr; 2086 2087 nvme_ns->bdev = NULL; 2088 2089 if (ctx) { 2090 ctx->populates_in_progress++; 2091 } 2092 nvme_ns->probe_ctx = ctx; 2093 2094 nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns); 2095 } 2096 2097 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid); 2098 } 2099 2100 if (ctx) { 2101 /* Decrement this count now that the loop is over to account 2102 * for the one we started with. If the count is then 0, we 2103 * know any populate_namespace functions completed immediately, 2104 * so we'll kick the callback here. 2105 */ 2106 ctx->populates_in_progress--; 2107 if (ctx->populates_in_progress == 0) { 2108 nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx); 2109 } 2110 } 2111 2112 } 2113 2114 static void 2115 nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr) 2116 { 2117 uint32_t i; 2118 struct nvme_ns *nvme_ns; 2119 2120 for (i = 0; i < nvme_ctrlr->num_ns; i++) { 2121 uint32_t nsid = i + 1; 2122 2123 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); 2124 if (nvme_ns != NULL) { 2125 assert(nvme_ns->id == nsid); 2126 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns); 2127 } 2128 } 2129 } 2130 2131 static bool 2132 nvme_ctrlr_acquire(struct nvme_ctrlr *nvme_ctrlr) 2133 { 2134 pthread_mutex_lock(&nvme_ctrlr->mutex); 2135 if (nvme_ctrlr->destruct || nvme_ctrlr->resetting) { 2136 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2137 return false; 2138 } 2139 nvme_ctrlr->ref++; 2140 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2141 return true; 2142 } 2143 2144 static int 2145 nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, 2146 void *cb_arg) 2147 { 2148 struct nvme_ctrlr *nvme_ctrlr = cb_arg; 2149 struct nvme_ns *nvme_ns; 2150 uint32_t i, nsid; 2151 2152 for (i = 0; i < desc->num_of_nsid; i++) { 2153 nsid = desc->nsid[i]; 2154 if (nsid == 0 || nsid > nvme_ctrlr->num_ns) { 2155 continue; 2156 } 2157 2158 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); 2159 2160 assert(nvme_ns != NULL); 2161 if (nvme_ns == NULL) { 2162 /* Target told us that an inactive namespace had an ANA change */ 2163 continue; 2164 } 2165 2166 nvme_ns->ana_group_id = desc->ana_group_id; 2167 nvme_ns->ana_state = desc->ana_state; 2168 } 2169 2170 return 0; 2171 } 2172 2173 static void 2174 nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl) 2175 { 2176 struct nvme_ctrlr *nvme_ctrlr = ctx; 2177 2178 if (spdk_nvme_cpl_is_success(cpl)) { 2179 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states, 2180 nvme_ctrlr); 2181 } 2182 2183 nvme_ctrlr_release(nvme_ctrlr); 2184 } 2185 2186 static void 2187 nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr) 2188 { 2189 int rc; 2190 2191 if (nvme_ctrlr->ana_log_page == NULL) { 2192 return; 2193 } 2194 2195 if (!nvme_ctrlr_acquire(nvme_ctrlr)) { 2196 return; 2197 } 2198 2199 rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr, 2200 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 2201 SPDK_NVME_GLOBAL_NS_TAG, 2202 nvme_ctrlr->ana_log_page, 2203 nvme_ctrlr->ana_log_page_size, 0, 2204 nvme_ctrlr_read_ana_log_page_done, 2205 nvme_ctrlr); 2206 if (rc != 0) { 2207 nvme_ctrlr_release(nvme_ctrlr); 2208 } 2209 } 2210 2211 static void 2212 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) 2213 { 2214 struct nvme_ctrlr *nvme_ctrlr = arg; 2215 union spdk_nvme_async_event_completion event; 2216 2217 if (spdk_nvme_cpl_is_error(cpl)) { 2218 SPDK_WARNLOG("AER request execute failed"); 2219 return; 2220 } 2221 2222 event.raw = cpl->cdw0; 2223 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 2224 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 2225 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL); 2226 } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 2227 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) { 2228 nvme_ctrlr_read_ana_log_page(nvme_ctrlr); 2229 } 2230 } 2231 2232 static void 2233 populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) 2234 { 2235 if (ctx->cb_fn) { 2236 ctx->cb_fn(ctx->cb_ctx, count, rc); 2237 } 2238 2239 ctx->namespaces_populated = true; 2240 if (ctx->probe_done) { 2241 /* The probe was already completed, so we need to free the context 2242 * here. This can happen for cases like OCSSD, where we need to 2243 * send additional commands to the SSD after attach. 2244 */ 2245 free(ctx); 2246 } 2247 } 2248 2249 static void 2250 nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr, 2251 struct nvme_async_probe_ctx *ctx) 2252 { 2253 spdk_io_device_register(nvme_ctrlr, 2254 bdev_nvme_create_ctrlr_channel_cb, 2255 bdev_nvme_destroy_ctrlr_channel_cb, 2256 sizeof(struct nvme_ctrlr_channel), 2257 nvme_ctrlr->name); 2258 2259 nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx); 2260 } 2261 2262 static void 2263 nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl) 2264 { 2265 struct nvme_ctrlr *nvme_ctrlr = _ctx; 2266 struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx; 2267 2268 nvme_ctrlr->probe_ctx = NULL; 2269 2270 if (spdk_nvme_cpl_is_error(cpl)) { 2271 nvme_ctrlr_delete(nvme_ctrlr); 2272 2273 if (ctx != NULL) { 2274 populate_namespaces_cb(ctx, 0, -1); 2275 } 2276 return; 2277 } 2278 2279 nvme_ctrlr_create_done(nvme_ctrlr, ctx); 2280 } 2281 2282 static int 2283 nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr, 2284 struct nvme_async_probe_ctx *ctx) 2285 { 2286 struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr; 2287 const struct spdk_nvme_ctrlr_data *cdata; 2288 uint32_t ana_log_page_size; 2289 2290 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 2291 2292 ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid * 2293 sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->nn * 2294 sizeof(uint32_t); 2295 2296 nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL, 2297 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 2298 if (nvme_ctrlr->ana_log_page == NULL) { 2299 SPDK_ERRLOG("could not allocate ANA log page buffer\n"); 2300 return -ENXIO; 2301 } 2302 2303 /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned. 2304 * Hence copy each descriptor to a temporary area when parsing it. 2305 * 2306 * Allocate a buffer whose size is as large as ANA log page buffer because 2307 * we do not know the size of a descriptor until actually reading it. 2308 */ 2309 nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size); 2310 if (nvme_ctrlr->copied_ana_desc == NULL) { 2311 SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n"); 2312 return -ENOMEM; 2313 } 2314 2315 nvme_ctrlr->ana_log_page_size = ana_log_page_size; 2316 2317 nvme_ctrlr->probe_ctx = ctx; 2318 2319 return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, 2320 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, 2321 SPDK_NVME_GLOBAL_NS_TAG, 2322 nvme_ctrlr->ana_log_page, 2323 nvme_ctrlr->ana_log_page_size, 0, 2324 nvme_ctrlr_init_ana_log_page_done, 2325 nvme_ctrlr); 2326 } 2327 2328 static int 2329 nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, 2330 const char *name, 2331 const struct spdk_nvme_transport_id *trid, 2332 uint32_t prchk_flags, 2333 struct nvme_async_probe_ctx *ctx) 2334 { 2335 struct nvme_ctrlr *nvme_ctrlr; 2336 struct nvme_ctrlr_trid *trid_entry; 2337 uint32_t num_ns; 2338 const struct spdk_nvme_ctrlr_data *cdata; 2339 int rc; 2340 2341 nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr)); 2342 if (nvme_ctrlr == NULL) { 2343 SPDK_ERRLOG("Failed to allocate device struct\n"); 2344 return -ENOMEM; 2345 } 2346 2347 rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL); 2348 if (rc != 0) { 2349 free(nvme_ctrlr); 2350 return rc; 2351 } 2352 2353 TAILQ_INIT(&nvme_ctrlr->trids); 2354 2355 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 2356 if (num_ns != 0) { 2357 nvme_ctrlr->namespaces = calloc(num_ns, sizeof(struct nvme_ns *)); 2358 if (!nvme_ctrlr->namespaces) { 2359 SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); 2360 rc = -ENOMEM; 2361 goto err; 2362 } 2363 2364 nvme_ctrlr->num_ns = num_ns; 2365 } 2366 2367 trid_entry = calloc(1, sizeof(*trid_entry)); 2368 if (trid_entry == NULL) { 2369 SPDK_ERRLOG("Failed to allocate trid entry pointer\n"); 2370 rc = -ENOMEM; 2371 goto err; 2372 } 2373 2374 trid_entry->trid = *trid; 2375 nvme_ctrlr->connected_trid = &trid_entry->trid; 2376 TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, trid_entry, link); 2377 2378 nvme_ctrlr->thread = spdk_get_thread(); 2379 nvme_ctrlr->ctrlr = ctrlr; 2380 nvme_ctrlr->ref = 1; 2381 nvme_ctrlr->name = strdup(name); 2382 if (nvme_ctrlr->name == NULL) { 2383 rc = -ENOMEM; 2384 goto err; 2385 } 2386 2387 if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { 2388 SPDK_ERRLOG("OCSSDs are not supported"); 2389 rc = -ENOTSUP; 2390 goto err; 2391 } 2392 2393 nvme_ctrlr->prchk_flags = prchk_flags; 2394 2395 nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr, 2396 g_opts.nvme_adminq_poll_period_us); 2397 2398 pthread_mutex_lock(&g_bdev_nvme_mutex); 2399 TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq); 2400 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2401 2402 if (g_opts.timeout_us > 0) { 2403 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */ 2404 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */ 2405 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ? 2406 g_opts.timeout_us : g_opts.timeout_admin_us; 2407 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, 2408 adm_timeout_us, timeout_cb, nvme_ctrlr); 2409 } 2410 2411 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr); 2412 spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr); 2413 2414 if (spdk_nvme_ctrlr_get_flags(ctrlr) & 2415 SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { 2416 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr); 2417 } 2418 2419 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 2420 2421 if (cdata->cmic.ana_reporting) { 2422 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx); 2423 if (rc == 0) { 2424 return 0; 2425 } 2426 } else { 2427 nvme_ctrlr_create_done(nvme_ctrlr, ctx); 2428 return 0; 2429 } 2430 2431 err: 2432 nvme_ctrlr_delete(nvme_ctrlr); 2433 return rc; 2434 } 2435 2436 static void 2437 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 2438 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 2439 { 2440 struct nvme_probe_ctx *ctx = cb_ctx; 2441 char *name = NULL; 2442 uint32_t prchk_flags = 0; 2443 size_t i; 2444 2445 if (ctx) { 2446 for (i = 0; i < ctx->count; i++) { 2447 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 2448 prchk_flags = ctx->prchk_flags[i]; 2449 name = strdup(ctx->names[i]); 2450 break; 2451 } 2452 } 2453 } else { 2454 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); 2455 } 2456 if (!name) { 2457 SPDK_ERRLOG("Failed to assign name to NVMe device\n"); 2458 return; 2459 } 2460 2461 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name); 2462 2463 nvme_ctrlr_create(ctrlr, name, trid, prchk_flags, NULL); 2464 2465 free(name); 2466 } 2467 2468 static void 2469 _nvme_ctrlr_destruct(void *ctx) 2470 { 2471 struct nvme_ctrlr *nvme_ctrlr = ctx; 2472 2473 nvme_ctrlr_depopulate_namespaces(nvme_ctrlr); 2474 nvme_ctrlr_release(nvme_ctrlr); 2475 } 2476 2477 static int 2478 _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, bool hotplug) 2479 { 2480 struct nvme_probe_skip_entry *entry; 2481 2482 pthread_mutex_lock(&nvme_ctrlr->mutex); 2483 2484 /* The controller's destruction was already started */ 2485 if (nvme_ctrlr->destruct) { 2486 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2487 return 0; 2488 } 2489 2490 if (!hotplug && 2491 nvme_ctrlr->connected_trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 2492 entry = calloc(1, sizeof(*entry)); 2493 if (!entry) { 2494 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2495 return -ENOMEM; 2496 } 2497 entry->trid = *nvme_ctrlr->connected_trid; 2498 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq); 2499 } 2500 2501 nvme_ctrlr->destruct = true; 2502 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2503 2504 _nvme_ctrlr_destruct(nvme_ctrlr); 2505 2506 return 0; 2507 } 2508 2509 static void 2510 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 2511 { 2512 struct nvme_ctrlr *nvme_ctrlr = cb_ctx; 2513 2514 _bdev_nvme_delete(nvme_ctrlr, true); 2515 } 2516 2517 static int 2518 bdev_nvme_hotplug_probe(void *arg) 2519 { 2520 if (g_hotplug_probe_ctx == NULL) { 2521 spdk_poller_unregister(&g_hotplug_probe_poller); 2522 return SPDK_POLLER_IDLE; 2523 } 2524 2525 if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) { 2526 g_hotplug_probe_ctx = NULL; 2527 spdk_poller_unregister(&g_hotplug_probe_poller); 2528 } 2529 2530 return SPDK_POLLER_BUSY; 2531 } 2532 2533 static int 2534 bdev_nvme_hotplug(void *arg) 2535 { 2536 struct spdk_nvme_transport_id trid_pcie; 2537 2538 if (g_hotplug_probe_ctx) { 2539 return SPDK_POLLER_BUSY; 2540 } 2541 2542 memset(&trid_pcie, 0, sizeof(trid_pcie)); 2543 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 2544 2545 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL, 2546 hotplug_probe_cb, attach_cb, NULL); 2547 2548 if (g_hotplug_probe_ctx) { 2549 assert(g_hotplug_probe_poller == NULL); 2550 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000); 2551 } 2552 2553 return SPDK_POLLER_BUSY; 2554 } 2555 2556 void 2557 bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) 2558 { 2559 *opts = g_opts; 2560 } 2561 2562 static int 2563 bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts) 2564 { 2565 if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) { 2566 /* Can't set timeout_admin_us without also setting timeout_us */ 2567 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n"); 2568 return -EINVAL; 2569 } 2570 2571 return 0; 2572 } 2573 2574 int 2575 bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) 2576 { 2577 int ret = bdev_nvme_validate_opts(opts); 2578 if (ret) { 2579 SPDK_WARNLOG("Failed to set nvme opts.\n"); 2580 return ret; 2581 } 2582 2583 if (g_bdev_nvme_init_thread != NULL) { 2584 if (!TAILQ_EMPTY(&g_nvme_ctrlrs)) { 2585 return -EPERM; 2586 } 2587 } 2588 2589 g_opts = *opts; 2590 2591 return 0; 2592 } 2593 2594 struct set_nvme_hotplug_ctx { 2595 uint64_t period_us; 2596 bool enabled; 2597 spdk_msg_fn fn; 2598 void *fn_ctx; 2599 }; 2600 2601 static void 2602 set_nvme_hotplug_period_cb(void *_ctx) 2603 { 2604 struct set_nvme_hotplug_ctx *ctx = _ctx; 2605 2606 spdk_poller_unregister(&g_hotplug_poller); 2607 if (ctx->enabled) { 2608 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us); 2609 } 2610 2611 g_nvme_hotplug_poll_period_us = ctx->period_us; 2612 g_nvme_hotplug_enabled = ctx->enabled; 2613 if (ctx->fn) { 2614 ctx->fn(ctx->fn_ctx); 2615 } 2616 2617 free(ctx); 2618 } 2619 2620 int 2621 bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx) 2622 { 2623 struct set_nvme_hotplug_ctx *ctx; 2624 2625 if (enabled == true && !spdk_process_is_primary()) { 2626 return -EPERM; 2627 } 2628 2629 ctx = calloc(1, sizeof(*ctx)); 2630 if (ctx == NULL) { 2631 return -ENOMEM; 2632 } 2633 2634 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; 2635 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); 2636 ctx->enabled = enabled; 2637 ctx->fn = cb; 2638 ctx->fn_ctx = cb_ctx; 2639 2640 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); 2641 return 0; 2642 } 2643 2644 static void 2645 nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr, 2646 struct nvme_async_probe_ctx *ctx) 2647 { 2648 struct nvme_ns *nvme_ns; 2649 struct nvme_bdev *nvme_bdev; 2650 size_t j; 2651 2652 assert(nvme_ctrlr != NULL); 2653 2654 /* 2655 * Report the new bdevs that were created in this call. 2656 * There can be more than one bdev per NVMe controller. 2657 */ 2658 j = 0; 2659 nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); 2660 while (nvme_ns != NULL) { 2661 nvme_bdev = nvme_ns->bdev; 2662 if (j < ctx->count) { 2663 ctx->names[j] = nvme_bdev->disk.name; 2664 j++; 2665 } else { 2666 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n", 2667 ctx->count); 2668 populate_namespaces_cb(ctx, 0, -ERANGE); 2669 return; 2670 } 2671 2672 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns); 2673 } 2674 2675 populate_namespaces_cb(ctx, j, 0); 2676 } 2677 2678 static int 2679 bdev_nvme_compare_trids(struct nvme_ctrlr *nvme_ctrlr, 2680 struct spdk_nvme_ctrlr *new_ctrlr, 2681 struct spdk_nvme_transport_id *trid) 2682 { 2683 struct nvme_ctrlr_trid *tmp_trid; 2684 2685 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 2686 SPDK_ERRLOG("PCIe failover is not supported.\n"); 2687 return -ENOTSUP; 2688 } 2689 2690 /* Currently we only support failover to the same transport type. */ 2691 if (nvme_ctrlr->connected_trid->trtype != trid->trtype) { 2692 return -EINVAL; 2693 } 2694 2695 /* Currently we only support failover to the same NQN. */ 2696 if (strncmp(trid->subnqn, nvme_ctrlr->connected_trid->subnqn, SPDK_NVMF_NQN_MAX_LEN)) { 2697 return -EINVAL; 2698 } 2699 2700 /* Skip all the other checks if we've already registered this path. */ 2701 TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) { 2702 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) { 2703 return -EEXIST; 2704 } 2705 } 2706 2707 return 0; 2708 } 2709 2710 static int 2711 bdev_nvme_compare_namespaces(struct nvme_ctrlr *nvme_ctrlr, 2712 struct spdk_nvme_ctrlr *new_ctrlr) 2713 { 2714 struct nvme_ns *nvme_ns; 2715 struct spdk_nvme_ns *new_ns; 2716 2717 if (spdk_nvme_ctrlr_get_num_ns(new_ctrlr) != nvme_ctrlr->num_ns) { 2718 return -EINVAL; 2719 } 2720 2721 nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); 2722 while (nvme_ns != NULL) { 2723 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id); 2724 assert(new_ns != NULL); 2725 2726 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) { 2727 return -EINVAL; 2728 } 2729 2730 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns); 2731 } 2732 2733 return 0; 2734 } 2735 2736 static int 2737 _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr, 2738 struct spdk_nvme_transport_id *trid) 2739 { 2740 struct nvme_ctrlr_trid *new_trid, *tmp_trid; 2741 2742 new_trid = calloc(1, sizeof(*new_trid)); 2743 if (new_trid == NULL) { 2744 return -ENOMEM; 2745 } 2746 new_trid->trid = *trid; 2747 new_trid->is_failed = false; 2748 2749 TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) { 2750 if (tmp_trid->is_failed) { 2751 TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link); 2752 return 0; 2753 } 2754 } 2755 2756 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link); 2757 return 0; 2758 } 2759 2760 /* This is the case that a secondary path is added to an existing 2761 * nvme_ctrlr for failover. After checking if it can access the same 2762 * namespaces as the primary path, it is disconnected until failover occurs. 2763 */ 2764 static int 2765 bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr, 2766 struct spdk_nvme_ctrlr *new_ctrlr, 2767 struct spdk_nvme_transport_id *trid) 2768 { 2769 int rc; 2770 2771 assert(nvme_ctrlr != NULL); 2772 2773 pthread_mutex_lock(&nvme_ctrlr->mutex); 2774 2775 rc = bdev_nvme_compare_trids(nvme_ctrlr, new_ctrlr, trid); 2776 if (rc != 0) { 2777 goto exit; 2778 } 2779 2780 rc = bdev_nvme_compare_namespaces(nvme_ctrlr, new_ctrlr); 2781 if (rc != 0) { 2782 goto exit; 2783 } 2784 2785 rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid); 2786 2787 exit: 2788 pthread_mutex_unlock(&nvme_ctrlr->mutex); 2789 2790 spdk_nvme_detach(new_ctrlr); 2791 2792 return rc; 2793 } 2794 2795 static void 2796 connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 2797 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 2798 { 2799 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 2800 struct nvme_async_probe_ctx *ctx; 2801 int rc; 2802 2803 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 2804 ctx->ctrlr_attached = true; 2805 2806 rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags, ctx); 2807 if (rc != 0) { 2808 populate_namespaces_cb(ctx, 0, rc); 2809 } 2810 } 2811 2812 static void 2813 connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 2814 struct spdk_nvme_ctrlr *ctrlr, 2815 const struct spdk_nvme_ctrlr_opts *opts) 2816 { 2817 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 2818 struct nvme_ctrlr *nvme_ctrlr; 2819 struct nvme_async_probe_ctx *ctx; 2820 int rc; 2821 2822 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 2823 ctx->ctrlr_attached = true; 2824 2825 nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name); 2826 if (nvme_ctrlr) { 2827 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid); 2828 } else { 2829 rc = -ENODEV; 2830 } 2831 2832 populate_namespaces_cb(ctx, 0, rc); 2833 } 2834 2835 static int 2836 bdev_nvme_async_poll(void *arg) 2837 { 2838 struct nvme_async_probe_ctx *ctx = arg; 2839 int rc; 2840 2841 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); 2842 if (spdk_unlikely(rc != -EAGAIN)) { 2843 ctx->probe_done = true; 2844 spdk_poller_unregister(&ctx->poller); 2845 if (!ctx->ctrlr_attached) { 2846 /* The probe is done, but no controller was attached. 2847 * That means we had a failure, so report -EIO back to 2848 * the caller (usually the RPC). populate_namespaces_cb() 2849 * will take care of freeing the nvme_async_probe_ctx. 2850 */ 2851 populate_namespaces_cb(ctx, 0, -EIO); 2852 } else if (ctx->namespaces_populated) { 2853 /* The namespaces for the attached controller were all 2854 * populated and the response was already sent to the 2855 * caller (usually the RPC). So free the context here. 2856 */ 2857 free(ctx); 2858 } 2859 } 2860 2861 return SPDK_POLLER_BUSY; 2862 } 2863 2864 int 2865 bdev_nvme_create(struct spdk_nvme_transport_id *trid, 2866 const char *base_name, 2867 const char **names, 2868 uint32_t count, 2869 uint32_t prchk_flags, 2870 spdk_bdev_create_nvme_fn cb_fn, 2871 void *cb_ctx, 2872 struct spdk_nvme_ctrlr_opts *opts) 2873 { 2874 struct nvme_probe_skip_entry *entry, *tmp; 2875 struct nvme_async_probe_ctx *ctx; 2876 spdk_nvme_attach_cb attach_cb; 2877 2878 /* TODO expand this check to include both the host and target TRIDs. 2879 * Only if both are the same should we fail. 2880 */ 2881 if (nvme_ctrlr_get(trid) != NULL) { 2882 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); 2883 return -EEXIST; 2884 } 2885 2886 ctx = calloc(1, sizeof(*ctx)); 2887 if (!ctx) { 2888 return -ENOMEM; 2889 } 2890 ctx->base_name = base_name; 2891 ctx->names = names; 2892 ctx->count = count; 2893 ctx->cb_fn = cb_fn; 2894 ctx->cb_ctx = cb_ctx; 2895 ctx->prchk_flags = prchk_flags; 2896 ctx->trid = *trid; 2897 2898 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 2899 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { 2900 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 2901 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 2902 free(entry); 2903 break; 2904 } 2905 } 2906 } 2907 2908 if (opts) { 2909 memcpy(&ctx->opts, opts, sizeof(*opts)); 2910 } else { 2911 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts)); 2912 } 2913 2914 ctx->opts.transport_retry_count = g_opts.retry_count; 2915 ctx->opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms; 2916 ctx->opts.disable_read_ana_log_page = true; 2917 2918 if (nvme_ctrlr_get_by_name(base_name) == NULL) { 2919 attach_cb = connect_attach_cb; 2920 } else { 2921 attach_cb = connect_set_failover_cb; 2922 } 2923 2924 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, attach_cb); 2925 if (ctx->probe_ctx == NULL) { 2926 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr); 2927 free(ctx); 2928 return -ENODEV; 2929 } 2930 ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); 2931 2932 return 0; 2933 } 2934 2935 static int 2936 bdev_nvme_delete_secondary_trid(struct nvme_ctrlr *nvme_ctrlr, 2937 const struct spdk_nvme_transport_id *trid) 2938 { 2939 struct nvme_ctrlr_trid *ctrlr_trid, *tmp_trid; 2940 2941 if (!spdk_nvme_transport_id_compare(trid, nvme_ctrlr->connected_trid)) { 2942 return -EBUSY; 2943 } 2944 2945 TAILQ_FOREACH_SAFE(ctrlr_trid, &nvme_ctrlr->trids, link, tmp_trid) { 2946 if (!spdk_nvme_transport_id_compare(&ctrlr_trid->trid, trid)) { 2947 TAILQ_REMOVE(&nvme_ctrlr->trids, ctrlr_trid, link); 2948 free(ctrlr_trid); 2949 return 0; 2950 } 2951 } 2952 2953 return -ENXIO; 2954 } 2955 2956 int 2957 bdev_nvme_delete(const char *name, const struct spdk_nvme_transport_id *trid) 2958 { 2959 struct nvme_ctrlr *nvme_ctrlr; 2960 struct nvme_ctrlr_trid *ctrlr_trid; 2961 2962 if (name == NULL) { 2963 return -EINVAL; 2964 } 2965 2966 nvme_ctrlr = nvme_ctrlr_get_by_name(name); 2967 if (nvme_ctrlr == NULL) { 2968 SPDK_ERRLOG("Failed to find NVMe controller\n"); 2969 return -ENODEV; 2970 } 2971 2972 /* case 1: remove the controller itself. */ 2973 if (trid == NULL) { 2974 return _bdev_nvme_delete(nvme_ctrlr, false); 2975 } 2976 2977 /* case 2: we are currently using the path to be removed. */ 2978 if (!spdk_nvme_transport_id_compare(trid, nvme_ctrlr->connected_trid)) { 2979 ctrlr_trid = TAILQ_FIRST(&nvme_ctrlr->trids); 2980 assert(nvme_ctrlr->connected_trid == &ctrlr_trid->trid); 2981 /* case 2A: the current path is the only path. */ 2982 if (!TAILQ_NEXT(ctrlr_trid, link)) { 2983 return _bdev_nvme_delete(nvme_ctrlr, false); 2984 } 2985 2986 /* case 2B: there is an alternative path. */ 2987 return bdev_nvme_failover(nvme_ctrlr, true); 2988 } 2989 2990 /* case 3: We are not using the specified path. */ 2991 return bdev_nvme_delete_secondary_trid(nvme_ctrlr, trid); 2992 } 2993 2994 static int 2995 bdev_nvme_library_init(void) 2996 { 2997 g_bdev_nvme_init_thread = spdk_get_thread(); 2998 2999 spdk_io_device_register(&g_nvme_ctrlrs, bdev_nvme_create_poll_group_cb, 3000 bdev_nvme_destroy_poll_group_cb, 3001 sizeof(struct nvme_poll_group), "nvme_poll_groups"); 3002 3003 return 0; 3004 } 3005 3006 static void 3007 bdev_nvme_library_fini(void) 3008 { 3009 struct nvme_ctrlr *nvme_ctrlr, *tmp; 3010 struct nvme_probe_skip_entry *entry, *entry_tmp; 3011 3012 spdk_poller_unregister(&g_hotplug_poller); 3013 free(g_hotplug_probe_ctx); 3014 g_hotplug_probe_ctx = NULL; 3015 3016 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) { 3017 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 3018 free(entry); 3019 } 3020 3021 pthread_mutex_lock(&g_bdev_nvme_mutex); 3022 TAILQ_FOREACH_SAFE(nvme_ctrlr, &g_nvme_ctrlrs, tailq, tmp) { 3023 pthread_mutex_lock(&nvme_ctrlr->mutex); 3024 if (nvme_ctrlr->destruct) { 3025 /* This controller's destruction was already started 3026 * before the application started shutting down 3027 */ 3028 pthread_mutex_unlock(&nvme_ctrlr->mutex); 3029 continue; 3030 } 3031 nvme_ctrlr->destruct = true; 3032 pthread_mutex_unlock(&nvme_ctrlr->mutex); 3033 3034 spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct, 3035 nvme_ctrlr); 3036 } 3037 3038 g_bdev_nvme_module_finish = true; 3039 if (TAILQ_EMPTY(&g_nvme_ctrlrs)) { 3040 pthread_mutex_unlock(&g_bdev_nvme_mutex); 3041 spdk_io_device_unregister(&g_nvme_ctrlrs, NULL); 3042 spdk_bdev_module_fini_done(); 3043 return; 3044 } 3045 3046 pthread_mutex_unlock(&g_bdev_nvme_mutex); 3047 } 3048 3049 static void 3050 bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio) 3051 { 3052 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3053 struct spdk_bdev *bdev = bdev_io->bdev; 3054 struct spdk_dif_ctx dif_ctx; 3055 struct spdk_dif_error err_blk = {}; 3056 int rc; 3057 3058 rc = spdk_dif_ctx_init(&dif_ctx, 3059 bdev->blocklen, bdev->md_len, bdev->md_interleave, 3060 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 3061 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0); 3062 if (rc != 0) { 3063 SPDK_ERRLOG("Initialization of DIF context failed\n"); 3064 return; 3065 } 3066 3067 if (bdev->md_interleave) { 3068 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 3069 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 3070 } else { 3071 struct iovec md_iov = { 3072 .iov_base = bdev_io->u.bdev.md_buf, 3073 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 3074 }; 3075 3076 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 3077 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 3078 } 3079 3080 if (rc != 0) { 3081 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 3082 err_blk.err_type, err_blk.err_offset); 3083 } else { 3084 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n"); 3085 } 3086 } 3087 3088 static void 3089 bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 3090 { 3091 struct nvme_bdev_io *bio = ref; 3092 3093 if (spdk_nvme_cpl_is_success(cpl)) { 3094 /* Run PI verification for read data buffer. */ 3095 bdev_nvme_verify_pi_error(bio); 3096 } 3097 3098 /* Return original completion status */ 3099 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl); 3100 } 3101 3102 static void 3103 bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 3104 { 3105 struct nvme_bdev_io *bio = ref; 3106 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3107 struct nvme_bdev_channel *nbdev_ch; 3108 struct spdk_nvme_ns *ns; 3109 struct spdk_nvme_qpair *qpair; 3110 int ret; 3111 3112 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) { 3113 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n", 3114 cpl->status.sct, cpl->status.sc); 3115 3116 /* Save completion status to use after verifying PI error. */ 3117 bio->cpl = *cpl; 3118 3119 nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io)); 3120 3121 if (spdk_likely(bdev_nvme_find_io_path(nbdev_ch, &ns, &qpair))) { 3122 /* Read without PI checking to verify PI error. */ 3123 ret = bdev_nvme_no_pi_readv(ns, 3124 qpair, 3125 bio, 3126 bdev_io->u.bdev.iovs, 3127 bdev_io->u.bdev.iovcnt, 3128 bdev_io->u.bdev.md_buf, 3129 bdev_io->u.bdev.num_blocks, 3130 bdev_io->u.bdev.offset_blocks); 3131 if (ret == 0) { 3132 return; 3133 } 3134 } 3135 } 3136 3137 bdev_nvme_io_complete_nvme_status(bio, cpl); 3138 } 3139 3140 static void 3141 bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 3142 { 3143 struct nvme_bdev_io *bio = ref; 3144 3145 if (spdk_nvme_cpl_is_pi_error(cpl)) { 3146 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n", 3147 cpl->status.sct, cpl->status.sc); 3148 /* Run PI verification for write data buffer if PI error is detected. */ 3149 bdev_nvme_verify_pi_error(bio); 3150 } 3151 3152 bdev_nvme_io_complete_nvme_status(bio, cpl); 3153 } 3154 3155 static void 3156 bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl) 3157 { 3158 struct nvme_bdev_io *bio = ref; 3159 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3160 3161 /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks. 3162 * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error(). 3163 */ 3164 bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0; 3165 3166 if (spdk_nvme_cpl_is_pi_error(cpl)) { 3167 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n", 3168 cpl->status.sct, cpl->status.sc); 3169 /* Run PI verification for zone append data buffer if PI error is detected. */ 3170 bdev_nvme_verify_pi_error(bio); 3171 } 3172 3173 bdev_nvme_io_complete_nvme_status(bio, cpl); 3174 } 3175 3176 static void 3177 bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl) 3178 { 3179 struct nvme_bdev_io *bio = ref; 3180 3181 if (spdk_nvme_cpl_is_pi_error(cpl)) { 3182 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n", 3183 cpl->status.sct, cpl->status.sc); 3184 /* Run PI verification for compare data buffer if PI error is detected. */ 3185 bdev_nvme_verify_pi_error(bio); 3186 } 3187 3188 bdev_nvme_io_complete_nvme_status(bio, cpl); 3189 } 3190 3191 static void 3192 bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 3193 { 3194 struct nvme_bdev_io *bio = ref; 3195 3196 /* Compare operation completion */ 3197 if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { 3198 /* Save compare result for write callback */ 3199 bio->cpl = *cpl; 3200 return; 3201 } 3202 3203 /* Write operation completion */ 3204 if (spdk_nvme_cpl_is_error(&bio->cpl)) { 3205 /* If bio->cpl is already an error, it means the compare operation failed. In that case, 3206 * complete the IO with the compare operation's status. 3207 */ 3208 if (!spdk_nvme_cpl_is_error(cpl)) { 3209 SPDK_ERRLOG("Unexpected write success after compare failure.\n"); 3210 } 3211 3212 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl); 3213 } else { 3214 bdev_nvme_io_complete_nvme_status(bio, cpl); 3215 } 3216 } 3217 3218 static void 3219 bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) 3220 { 3221 struct nvme_bdev_io *bio = ref; 3222 3223 bdev_nvme_io_complete_nvme_status(bio, cpl); 3224 } 3225 3226 static int 3227 fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc) 3228 { 3229 switch (desc->zs) { 3230 case SPDK_NVME_ZONE_STATE_EMPTY: 3231 info->state = SPDK_BDEV_ZONE_STATE_EMPTY; 3232 break; 3233 case SPDK_NVME_ZONE_STATE_IOPEN: 3234 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN; 3235 break; 3236 case SPDK_NVME_ZONE_STATE_EOPEN: 3237 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN; 3238 break; 3239 case SPDK_NVME_ZONE_STATE_CLOSED: 3240 info->state = SPDK_BDEV_ZONE_STATE_CLOSED; 3241 break; 3242 case SPDK_NVME_ZONE_STATE_RONLY: 3243 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY; 3244 break; 3245 case SPDK_NVME_ZONE_STATE_FULL: 3246 info->state = SPDK_BDEV_ZONE_STATE_FULL; 3247 break; 3248 case SPDK_NVME_ZONE_STATE_OFFLINE: 3249 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE; 3250 break; 3251 default: 3252 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs); 3253 return -EIO; 3254 } 3255 3256 info->zone_id = desc->zslba; 3257 info->write_pointer = desc->wp; 3258 info->capacity = desc->zcap; 3259 3260 return 0; 3261 } 3262 3263 static void 3264 bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl) 3265 { 3266 struct nvme_bdev_io *bio = ref; 3267 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3268 struct spdk_io_channel *ch = spdk_bdev_io_get_io_channel(bdev_io); 3269 struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch); 3270 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 3271 uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones; 3272 struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf; 3273 uint64_t max_zones_per_buf, i; 3274 uint32_t zone_report_bufsize; 3275 struct spdk_nvme_ns *ns; 3276 struct spdk_nvme_qpair *qpair; 3277 int ret; 3278 3279 if (spdk_nvme_cpl_is_error(cpl)) { 3280 goto out_complete_io_nvme_cpl; 3281 } 3282 3283 if (!bdev_nvme_find_io_path(nbdev_ch, &ns, &qpair)) { 3284 ret = -ENXIO; 3285 goto out_complete_io_ret; 3286 } 3287 3288 zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns); 3289 max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) / 3290 sizeof(bio->zone_report_buf->descs[0]); 3291 3292 if (bio->zone_report_buf->nr_zones > max_zones_per_buf) { 3293 ret = -EINVAL; 3294 goto out_complete_io_ret; 3295 } 3296 3297 if (!bio->zone_report_buf->nr_zones) { 3298 ret = -EINVAL; 3299 goto out_complete_io_ret; 3300 } 3301 3302 for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) { 3303 ret = fill_zone_from_report(&info[bio->handled_zones], 3304 &bio->zone_report_buf->descs[i]); 3305 if (ret) { 3306 goto out_complete_io_ret; 3307 } 3308 bio->handled_zones++; 3309 } 3310 3311 if (bio->handled_zones < zones_to_copy) { 3312 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns); 3313 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones); 3314 3315 memset(bio->zone_report_buf, 0, zone_report_bufsize); 3316 ret = spdk_nvme_zns_report_zones(ns, qpair, 3317 bio->zone_report_buf, zone_report_bufsize, 3318 slba, SPDK_NVME_ZRA_LIST_ALL, true, 3319 bdev_nvme_get_zone_info_done, bio); 3320 if (!ret) { 3321 return; 3322 } else { 3323 goto out_complete_io_ret; 3324 } 3325 } 3326 3327 out_complete_io_nvme_cpl: 3328 free(bio->zone_report_buf); 3329 bio->zone_report_buf = NULL; 3330 bdev_nvme_io_complete_nvme_status(bio, cpl); 3331 return; 3332 3333 out_complete_io_ret: 3334 free(bio->zone_report_buf); 3335 bio->zone_report_buf = NULL; 3336 bdev_nvme_io_complete(bio, ret); 3337 } 3338 3339 static void 3340 bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl) 3341 { 3342 struct nvme_bdev_io *bio = ref; 3343 3344 bdev_nvme_io_complete_nvme_status(bio, cpl); 3345 } 3346 3347 static void 3348 bdev_nvme_admin_passthru_completion(void *ctx) 3349 { 3350 struct nvme_bdev_io *bio = ctx; 3351 3352 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl); 3353 } 3354 3355 static void 3356 bdev_nvme_abort_completion(void *ctx) 3357 { 3358 struct nvme_bdev_io *bio = ctx; 3359 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3360 3361 if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) { 3362 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 3363 } else { 3364 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 3365 } 3366 } 3367 3368 static void 3369 bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl) 3370 { 3371 struct nvme_bdev_io *bio = ref; 3372 3373 bio->cpl = *cpl; 3374 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio); 3375 } 3376 3377 static void 3378 bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) 3379 { 3380 struct nvme_bdev_io *bio = ref; 3381 3382 bio->cpl = *cpl; 3383 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); 3384 } 3385 3386 static void 3387 bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) 3388 { 3389 struct nvme_bdev_io *bio = ref; 3390 struct iovec *iov; 3391 3392 bio->iov_offset = sgl_offset; 3393 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { 3394 iov = &bio->iovs[bio->iovpos]; 3395 if (bio->iov_offset < iov->iov_len) { 3396 break; 3397 } 3398 3399 bio->iov_offset -= iov->iov_len; 3400 } 3401 } 3402 3403 static int 3404 bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) 3405 { 3406 struct nvme_bdev_io *bio = ref; 3407 struct iovec *iov; 3408 3409 assert(bio->iovpos < bio->iovcnt); 3410 3411 iov = &bio->iovs[bio->iovpos]; 3412 3413 *address = iov->iov_base; 3414 *length = iov->iov_len; 3415 3416 if (bio->iov_offset) { 3417 assert(bio->iov_offset <= iov->iov_len); 3418 *address += bio->iov_offset; 3419 *length -= bio->iov_offset; 3420 } 3421 3422 bio->iov_offset += *length; 3423 if (bio->iov_offset == iov->iov_len) { 3424 bio->iovpos++; 3425 bio->iov_offset = 0; 3426 } 3427 3428 return 0; 3429 } 3430 3431 static void 3432 bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset) 3433 { 3434 struct nvme_bdev_io *bio = ref; 3435 struct iovec *iov; 3436 3437 bio->fused_iov_offset = sgl_offset; 3438 for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) { 3439 iov = &bio->fused_iovs[bio->fused_iovpos]; 3440 if (bio->fused_iov_offset < iov->iov_len) { 3441 break; 3442 } 3443 3444 bio->fused_iov_offset -= iov->iov_len; 3445 } 3446 } 3447 3448 static int 3449 bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length) 3450 { 3451 struct nvme_bdev_io *bio = ref; 3452 struct iovec *iov; 3453 3454 assert(bio->fused_iovpos < bio->fused_iovcnt); 3455 3456 iov = &bio->fused_iovs[bio->fused_iovpos]; 3457 3458 *address = iov->iov_base; 3459 *length = iov->iov_len; 3460 3461 if (bio->fused_iov_offset) { 3462 assert(bio->fused_iov_offset <= iov->iov_len); 3463 *address += bio->fused_iov_offset; 3464 *length -= bio->fused_iov_offset; 3465 } 3466 3467 bio->fused_iov_offset += *length; 3468 if (bio->fused_iov_offset == iov->iov_len) { 3469 bio->fused_iovpos++; 3470 bio->fused_iov_offset = 0; 3471 } 3472 3473 return 0; 3474 } 3475 3476 static int 3477 bdev_nvme_no_pi_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3478 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 3479 void *md, uint64_t lba_count, uint64_t lba) 3480 { 3481 int rc; 3482 3483 SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n", 3484 lba_count, lba); 3485 3486 bio->iovs = iov; 3487 bio->iovcnt = iovcnt; 3488 bio->iovpos = 0; 3489 bio->iov_offset = 0; 3490 3491 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count, 3492 bdev_nvme_no_pi_readv_done, bio, 0, 3493 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3494 md, 0, 0); 3495 3496 if (rc != 0 && rc != -ENOMEM) { 3497 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc); 3498 } 3499 return rc; 3500 } 3501 3502 static int 3503 bdev_nvme_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3504 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 3505 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags, 3506 struct spdk_bdev_ext_io_opts *ext_opts) 3507 { 3508 int rc; 3509 3510 SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n", 3511 lba_count, lba); 3512 3513 bio->iovs = iov; 3514 bio->iovcnt = iovcnt; 3515 bio->iovpos = 0; 3516 bio->iov_offset = 0; 3517 3518 if (ext_opts) { 3519 bio->ext_opts.size = sizeof(struct spdk_nvme_ns_cmd_ext_io_opts); 3520 bio->ext_opts.memory_domain = ext_opts->memory_domain; 3521 bio->ext_opts.memory_domain_ctx = ext_opts->memory_domain_ctx; 3522 bio->ext_opts.io_flags = flags; 3523 bio->ext_opts.metadata = md; 3524 3525 rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count, 3526 bdev_nvme_readv_done, bio, 3527 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3528 &bio->ext_opts); 3529 } else if (iovcnt == 1) { 3530 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base, md, lba, 3531 lba_count, 3532 bdev_nvme_readv_done, bio, 3533 flags, 3534 0, 0); 3535 } else { 3536 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count, 3537 bdev_nvme_readv_done, bio, flags, 3538 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3539 md, 0, 0); 3540 } 3541 3542 if (rc != 0 && rc != -ENOMEM) { 3543 SPDK_ERRLOG("readv failed: rc = %d\n", rc); 3544 } 3545 return rc; 3546 } 3547 3548 static int 3549 bdev_nvme_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3550 struct nvme_bdev_io *bio, 3551 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 3552 uint32_t flags, struct spdk_bdev_ext_io_opts *ext_opts) 3553 { 3554 int rc; 3555 3556 SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n", 3557 lba_count, lba); 3558 3559 bio->iovs = iov; 3560 bio->iovcnt = iovcnt; 3561 bio->iovpos = 0; 3562 bio->iov_offset = 0; 3563 3564 if (ext_opts) { 3565 bio->ext_opts.size = sizeof(struct spdk_nvme_ns_cmd_ext_io_opts); 3566 bio->ext_opts.memory_domain = ext_opts->memory_domain; 3567 bio->ext_opts.memory_domain_ctx = ext_opts->memory_domain_ctx; 3568 bio->ext_opts.io_flags = flags; 3569 bio->ext_opts.metadata = md; 3570 3571 rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count, 3572 bdev_nvme_readv_done, bio, 3573 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3574 &bio->ext_opts); 3575 } else if (iovcnt == 1) { 3576 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base, md, lba, 3577 lba_count, 3578 bdev_nvme_writev_done, bio, 3579 flags, 3580 0, 0); 3581 } else { 3582 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count, 3583 bdev_nvme_writev_done, bio, flags, 3584 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3585 md, 0, 0); 3586 } 3587 3588 if (rc != 0 && rc != -ENOMEM) { 3589 SPDK_ERRLOG("writev failed: rc = %d\n", rc); 3590 } 3591 return rc; 3592 } 3593 3594 static int 3595 bdev_nvme_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3596 struct nvme_bdev_io *bio, 3597 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t zslba, 3598 uint32_t flags) 3599 { 3600 int rc; 3601 3602 SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n", 3603 lba_count, zslba); 3604 3605 bio->iovs = iov; 3606 bio->iovcnt = iovcnt; 3607 bio->iovpos = 0; 3608 bio->iov_offset = 0; 3609 3610 if (iovcnt == 1) { 3611 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba, 3612 lba_count, 3613 bdev_nvme_zone_appendv_done, bio, 3614 flags, 3615 0, 0); 3616 } else { 3617 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count, 3618 bdev_nvme_zone_appendv_done, bio, flags, 3619 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3620 md, 0, 0); 3621 } 3622 3623 if (rc != 0 && rc != -ENOMEM) { 3624 SPDK_ERRLOG("zone append failed: rc = %d\n", rc); 3625 } 3626 return rc; 3627 } 3628 3629 static int 3630 bdev_nvme_comparev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3631 struct nvme_bdev_io *bio, 3632 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, 3633 uint32_t flags) 3634 { 3635 int rc; 3636 3637 SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n", 3638 lba_count, lba); 3639 3640 bio->iovs = iov; 3641 bio->iovcnt = iovcnt; 3642 bio->iovpos = 0; 3643 bio->iov_offset = 0; 3644 3645 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count, 3646 bdev_nvme_comparev_done, bio, flags, 3647 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 3648 md, 0, 0); 3649 3650 if (rc != 0 && rc != -ENOMEM) { 3651 SPDK_ERRLOG("comparev failed: rc = %d\n", rc); 3652 } 3653 return rc; 3654 } 3655 3656 static int 3657 bdev_nvme_comparev_and_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3658 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, 3659 struct iovec *write_iov, int write_iovcnt, 3660 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags) 3661 { 3662 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 3663 int rc; 3664 3665 SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n", 3666 lba_count, lba); 3667 3668 bio->iovs = cmp_iov; 3669 bio->iovcnt = cmp_iovcnt; 3670 bio->iovpos = 0; 3671 bio->iov_offset = 0; 3672 bio->fused_iovs = write_iov; 3673 bio->fused_iovcnt = write_iovcnt; 3674 bio->fused_iovpos = 0; 3675 bio->fused_iov_offset = 0; 3676 3677 if (bdev_io->num_retries == 0) { 3678 bio->first_fused_submitted = false; 3679 } 3680 3681 if (!bio->first_fused_submitted) { 3682 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST; 3683 memset(&bio->cpl, 0, sizeof(bio->cpl)); 3684 3685 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count, 3686 bdev_nvme_comparev_and_writev_done, bio, flags, 3687 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0); 3688 if (rc == 0) { 3689 bio->first_fused_submitted = true; 3690 flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST; 3691 } else { 3692 if (rc != -ENOMEM) { 3693 SPDK_ERRLOG("compare failed: rc = %d\n", rc); 3694 } 3695 return rc; 3696 } 3697 } 3698 3699 flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND; 3700 3701 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count, 3702 bdev_nvme_comparev_and_writev_done, bio, flags, 3703 bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0); 3704 if (rc != 0 && rc != -ENOMEM) { 3705 SPDK_ERRLOG("write failed: rc = %d\n", rc); 3706 rc = 0; 3707 } 3708 3709 return rc; 3710 } 3711 3712 static int 3713 bdev_nvme_unmap(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3714 struct nvme_bdev_io *bio, 3715 uint64_t offset_blocks, 3716 uint64_t num_blocks) 3717 { 3718 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; 3719 struct spdk_nvme_dsm_range *range; 3720 uint64_t offset, remaining; 3721 uint64_t num_ranges_u64; 3722 uint16_t num_ranges; 3723 int rc; 3724 3725 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / 3726 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 3727 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { 3728 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 3729 return -EINVAL; 3730 } 3731 num_ranges = (uint16_t)num_ranges_u64; 3732 3733 offset = offset_blocks; 3734 remaining = num_blocks; 3735 range = &dsm_ranges[0]; 3736 3737 /* Fill max-size ranges until the remaining blocks fit into one range */ 3738 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { 3739 range->attributes.raw = 0; 3740 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 3741 range->starting_lba = offset; 3742 3743 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 3744 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 3745 range++; 3746 } 3747 3748 /* Final range describes the remaining blocks */ 3749 range->attributes.raw = 0; 3750 range->length = remaining; 3751 range->starting_lba = offset; 3752 3753 rc = spdk_nvme_ns_cmd_dataset_management(ns, qpair, 3754 SPDK_NVME_DSM_ATTR_DEALLOCATE, 3755 dsm_ranges, num_ranges, 3756 bdev_nvme_queued_done, bio); 3757 3758 return rc; 3759 } 3760 3761 static int 3762 bdev_nvme_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3763 struct nvme_bdev_io *bio, 3764 uint64_t offset_blocks, 3765 uint64_t num_blocks) 3766 { 3767 if (num_blocks > UINT16_MAX + 1) { 3768 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n"); 3769 return -EINVAL; 3770 } 3771 3772 return spdk_nvme_ns_cmd_write_zeroes(ns, qpair, 3773 offset_blocks, num_blocks, 3774 bdev_nvme_queued_done, bio, 3775 0); 3776 } 3777 3778 static int 3779 bdev_nvme_get_zone_info(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3780 struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones, 3781 struct spdk_bdev_zone_info *info) 3782 { 3783 uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns); 3784 uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns); 3785 uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns); 3786 3787 if (zone_id % zone_size != 0) { 3788 return -EINVAL; 3789 } 3790 3791 if (num_zones > total_zones || !num_zones) { 3792 return -EINVAL; 3793 } 3794 3795 assert(!bio->zone_report_buf); 3796 bio->zone_report_buf = calloc(1, zone_report_bufsize); 3797 if (!bio->zone_report_buf) { 3798 return -ENOMEM; 3799 } 3800 3801 bio->handled_zones = 0; 3802 3803 return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize, 3804 zone_id, SPDK_NVME_ZRA_LIST_ALL, true, 3805 bdev_nvme_get_zone_info_done, bio); 3806 } 3807 3808 static int 3809 bdev_nvme_zone_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3810 struct nvme_bdev_io *bio, uint64_t zone_id, 3811 enum spdk_bdev_zone_action action) 3812 { 3813 switch (action) { 3814 case SPDK_BDEV_ZONE_CLOSE: 3815 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false, 3816 bdev_nvme_zone_management_done, bio); 3817 case SPDK_BDEV_ZONE_FINISH: 3818 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false, 3819 bdev_nvme_zone_management_done, bio); 3820 case SPDK_BDEV_ZONE_OPEN: 3821 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false, 3822 bdev_nvme_zone_management_done, bio); 3823 case SPDK_BDEV_ZONE_RESET: 3824 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false, 3825 bdev_nvme_zone_management_done, bio); 3826 case SPDK_BDEV_ZONE_OFFLINE: 3827 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false, 3828 bdev_nvme_zone_management_done, bio); 3829 default: 3830 return -EINVAL; 3831 } 3832 } 3833 3834 static int 3835 bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio, 3836 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 3837 { 3838 struct nvme_ctrlr *nvme_ctrlr; 3839 uint32_t max_xfer_size; 3840 3841 if (!bdev_nvme_find_admin_path(nbdev_ch, &nvme_ctrlr)) { 3842 return -EINVAL; 3843 } 3844 3845 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr); 3846 3847 if (nbytes > max_xfer_size) { 3848 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 3849 return -EINVAL; 3850 } 3851 3852 bio->orig_thread = spdk_get_thread(); 3853 3854 return spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, 3855 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); 3856 } 3857 3858 static int 3859 bdev_nvme_io_passthru(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3860 struct nvme_bdev_io *bio, 3861 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 3862 { 3863 uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); 3864 struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); 3865 3866 if (nbytes > max_xfer_size) { 3867 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 3868 return -EINVAL; 3869 } 3870 3871 /* 3872 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 3873 * so fill it out automatically. 3874 */ 3875 cmd->nsid = spdk_nvme_ns_get_id(ns); 3876 3877 return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf, 3878 (uint32_t)nbytes, bdev_nvme_queued_done, bio); 3879 } 3880 3881 static int 3882 bdev_nvme_io_passthru_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, 3883 struct nvme_bdev_io *bio, 3884 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) 3885 { 3886 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns); 3887 uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); 3888 struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); 3889 3890 if (nbytes > max_xfer_size) { 3891 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 3892 return -EINVAL; 3893 } 3894 3895 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) { 3896 SPDK_ERRLOG("invalid meta data buffer size\n"); 3897 return -EINVAL; 3898 } 3899 3900 /* 3901 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 3902 * so fill it out automatically. 3903 */ 3904 cmd->nsid = spdk_nvme_ns_get_id(ns); 3905 3906 return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf, 3907 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); 3908 } 3909 3910 static int 3911 bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio, 3912 struct nvme_bdev_io *bio_to_abort) 3913 { 3914 struct nvme_ctrlr_channel *ctrlr_ch = nbdev_ch->ctrlr_ch; 3915 int rc; 3916 3917 bio->orig_thread = spdk_get_thread(); 3918 3919 rc = spdk_nvme_ctrlr_cmd_abort_ext(ctrlr_ch->ctrlr->ctrlr, 3920 ctrlr_ch->qpair, 3921 bio_to_abort, 3922 bdev_nvme_abort_done, bio); 3923 if (rc == -ENOENT) { 3924 /* If no command was found in I/O qpair, the target command may be 3925 * admin command. 3926 */ 3927 rc = spdk_nvme_ctrlr_cmd_abort_ext(ctrlr_ch->ctrlr->ctrlr, 3928 NULL, 3929 bio_to_abort, 3930 bdev_nvme_abort_done, bio); 3931 } 3932 3933 if (rc == -ENOENT) { 3934 /* If no command was found, complete the abort request with failure. */ 3935 bio->cpl.cdw0 |= 1U; 3936 bio->cpl.status.sc = SPDK_NVME_SC_SUCCESS; 3937 bio->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 3938 3939 bdev_nvme_abort_completion(bio); 3940 3941 rc = 0; 3942 } 3943 3944 return rc; 3945 } 3946 3947 static void 3948 bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w) 3949 { 3950 const char *action; 3951 3952 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { 3953 action = "reset"; 3954 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { 3955 action = "abort"; 3956 } else { 3957 action = "none"; 3958 } 3959 3960 spdk_json_write_object_begin(w); 3961 3962 spdk_json_write_named_string(w, "method", "bdev_nvme_set_options"); 3963 3964 spdk_json_write_named_object_begin(w, "params"); 3965 spdk_json_write_named_string(w, "action_on_timeout", action); 3966 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); 3967 spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us); 3968 spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms); 3969 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); 3970 spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst); 3971 spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight); 3972 spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight); 3973 spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight); 3974 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); 3975 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); 3976 spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); 3977 spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); 3978 spdk_json_write_object_end(w); 3979 3980 spdk_json_write_object_end(w); 3981 } 3982 3983 static void 3984 nvme_ctrlr_config_json(struct spdk_json_write_ctx *w, 3985 struct nvme_ctrlr *nvme_ctrlr) 3986 { 3987 struct spdk_nvme_transport_id *trid; 3988 3989 trid = nvme_ctrlr->connected_trid; 3990 3991 spdk_json_write_object_begin(w); 3992 3993 spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller"); 3994 3995 spdk_json_write_named_object_begin(w, "params"); 3996 spdk_json_write_named_string(w, "name", nvme_ctrlr->name); 3997 nvme_bdev_dump_trid_json(trid, w); 3998 spdk_json_write_named_bool(w, "prchk_reftag", 3999 (nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); 4000 spdk_json_write_named_bool(w, "prchk_guard", 4001 (nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); 4002 4003 spdk_json_write_object_end(w); 4004 4005 spdk_json_write_object_end(w); 4006 } 4007 4008 static void 4009 bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w) 4010 { 4011 spdk_json_write_object_begin(w); 4012 spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug"); 4013 4014 spdk_json_write_named_object_begin(w, "params"); 4015 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); 4016 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); 4017 spdk_json_write_object_end(w); 4018 4019 spdk_json_write_object_end(w); 4020 } 4021 4022 static int 4023 bdev_nvme_config_json(struct spdk_json_write_ctx *w) 4024 { 4025 struct nvme_ctrlr *nvme_ctrlr; 4026 4027 bdev_nvme_opts_config_json(w); 4028 4029 pthread_mutex_lock(&g_bdev_nvme_mutex); 4030 4031 TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { 4032 nvme_ctrlr_config_json(w, nvme_ctrlr); 4033 } 4034 4035 /* Dump as last parameter to give all NVMe bdevs chance to be constructed 4036 * before enabling hotplug poller. 4037 */ 4038 bdev_nvme_hotplug_config_json(w); 4039 4040 pthread_mutex_unlock(&g_bdev_nvme_mutex); 4041 return 0; 4042 } 4043 4044 struct spdk_nvme_ctrlr * 4045 bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) 4046 { 4047 if (!bdev || bdev->module != &nvme_if) { 4048 return NULL; 4049 } 4050 4051 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ns->ctrlr->ctrlr; 4052 } 4053 4054 SPDK_LOG_REGISTER_COMPONENT(bdev_nvme) 4055