1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_nvme.h" 37 #include "bdev_ocssd.h" 38 39 #include "spdk/config.h" 40 #include "spdk/conf.h" 41 #include "spdk/endian.h" 42 #include "spdk/bdev.h" 43 #include "spdk/json.h" 44 #include "spdk/nvme.h" 45 #include "spdk/nvme_ocssd.h" 46 #include "spdk/thread.h" 47 #include "spdk/string.h" 48 #include "spdk/likely.h" 49 #include "spdk/util.h" 50 51 #include "spdk/bdev_module.h" 52 #include "spdk_internal/log.h" 53 54 #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true 55 56 static void bdev_nvme_get_spdk_running_config(FILE *fp); 57 static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); 58 59 struct nvme_bdev_io { 60 /** array of iovecs to transfer. */ 61 struct iovec *iovs; 62 63 /** Number of iovecs in iovs array. */ 64 int iovcnt; 65 66 /** Current iovec position. */ 67 int iovpos; 68 69 /** Offset in current iovec. */ 70 uint32_t iov_offset; 71 72 /** array of iovecs to transfer. */ 73 struct iovec *fused_iovs; 74 75 /** Number of iovecs in iovs array. */ 76 int fused_iovcnt; 77 78 /** Current iovec position. */ 79 int fused_iovpos; 80 81 /** Offset in current iovec. */ 82 uint32_t fused_iov_offset; 83 84 /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ 85 struct spdk_nvme_cpl cpl; 86 87 /** Originating thread */ 88 struct spdk_thread *orig_thread; 89 90 /** Keeps track if first of fused commands was submitted */ 91 bool first_fused_submitted; 92 }; 93 94 struct nvme_probe_ctx { 95 size_t count; 96 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; 97 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; 98 const char *names[NVME_MAX_CONTROLLERS]; 99 uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; 100 const char *hostnqn; 101 }; 102 103 struct nvme_probe_skip_entry { 104 struct spdk_nvme_transport_id trid; 105 TAILQ_ENTRY(nvme_probe_skip_entry) tailq; 106 }; 107 /* All the controllers deleted by users via RPC are skipped by hotplug monitor */ 108 static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER( 109 g_skipped_nvme_ctrlrs); 110 111 static struct spdk_bdev_nvme_opts g_opts = { 112 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, 113 .timeout_us = 0, 114 .retry_count = 4, 115 .arbitration_burst = 0, 116 .low_priority_weight = 0, 117 .medium_priority_weight = 0, 118 .high_priority_weight = 0, 119 .nvme_adminq_poll_period_us = 10000ULL, 120 .nvme_ioq_poll_period_us = 0, 121 .io_queue_requests = 0, 122 .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, 123 }; 124 125 #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL 126 #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL 127 128 static int g_hot_insert_nvme_controller_index = 0; 129 static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; 130 static bool g_nvme_hotplug_enabled = false; 131 static struct spdk_thread *g_bdev_nvme_init_thread; 132 static struct spdk_poller *g_hotplug_poller; 133 static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; 134 static char *g_nvme_hostnqn = NULL; 135 136 static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 137 struct nvme_async_probe_ctx *ctx); 138 static void nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx); 139 static int bdev_nvme_library_init(void); 140 static void bdev_nvme_library_fini(void); 141 static int bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 142 struct nvme_bdev_io *bio, 143 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 144 static int bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 145 struct nvme_bdev_io *bio, 146 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 147 static int bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 148 struct nvme_bdev_io *bio, 149 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 150 static int bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 151 struct nvme_bdev_io *bio, 152 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 153 static int bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 154 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 155 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba); 156 static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 157 struct nvme_bdev_io *bio, 158 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 159 static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 160 struct nvme_bdev_io *bio, 161 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 162 static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 163 struct nvme_bdev_io *bio, 164 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); 165 static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio); 166 167 typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 168 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 169 static void nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 170 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 171 172 static populate_namespace_fn g_populate_namespace_fn[] = { 173 NULL, 174 nvme_ctrlr_populate_standard_namespace, 175 bdev_ocssd_populate_namespace, 176 }; 177 178 typedef void (*depopulate_namespace_fn)(struct nvme_bdev_ns *ns); 179 static void nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns); 180 181 static depopulate_namespace_fn g_depopulate_namespace_fn[] = { 182 NULL, 183 nvme_ctrlr_depopulate_standard_namespace, 184 bdev_ocssd_depopulate_namespace, 185 }; 186 187 typedef void (*config_json_namespace_fn)(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns); 188 static void nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, 189 struct nvme_bdev_ns *ns); 190 191 static config_json_namespace_fn g_config_json_namespace_fn[] = { 192 NULL, 193 nvme_ctrlr_config_json_standard_namespace, 194 bdev_ocssd_namespace_config_json, 195 }; 196 197 struct spdk_nvme_qpair * 198 bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) 199 { 200 struct nvme_io_channel *nvme_ch; 201 202 nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); 203 204 return nvme_ch->qpair; 205 } 206 207 static int 208 bdev_nvme_get_ctx_size(void) 209 { 210 return sizeof(struct nvme_bdev_io); 211 } 212 213 static struct spdk_bdev_module nvme_if = { 214 .name = "nvme", 215 .async_fini = true, 216 .module_init = bdev_nvme_library_init, 217 .module_fini = bdev_nvme_library_fini, 218 .config_text = bdev_nvme_get_spdk_running_config, 219 .config_json = bdev_nvme_config_json, 220 .get_ctx_size = bdev_nvme_get_ctx_size, 221 222 }; 223 SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if) 224 225 static void 226 bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 227 { 228 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "qpar %p is disconnected, attempting reconnect.\n", qpair); 229 /* 230 * Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will 231 * reconnect a qpair and we will stop getting a callback for this one. 232 */ 233 spdk_nvme_ctrlr_reconnect_io_qpair(qpair); 234 } 235 236 static int 237 bdev_nvme_poll(void *arg) 238 { 239 struct nvme_bdev_poll_group *group = arg; 240 int64_t num_completions; 241 242 if (group->collect_spin_stat && group->start_ticks == 0) { 243 group->start_ticks = spdk_get_ticks(); 244 } 245 246 num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, 247 bdev_nvme_disconnected_qpair_cb); 248 if (group->collect_spin_stat) { 249 if (num_completions > 0) { 250 if (group->end_ticks != 0) { 251 group->spin_ticks += (group->end_ticks - group->start_ticks); 252 group->end_ticks = 0; 253 } 254 group->start_ticks = 0; 255 } else { 256 group->end_ticks = spdk_get_ticks(); 257 } 258 } 259 260 return num_completions; 261 } 262 263 static int 264 bdev_nvme_poll_adminq(void *arg) 265 { 266 int32_t rc; 267 struct spdk_nvme_ctrlr *ctrlr = arg; 268 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 269 270 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 271 272 if (rc < 0) { 273 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 274 assert(nvme_bdev_ctrlr != NULL); 275 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 276 } 277 278 return rc; 279 } 280 281 static int 282 bdev_nvme_destruct(void *ctx) 283 { 284 struct nvme_bdev *nvme_disk = ctx; 285 286 nvme_bdev_detach_bdev_from_ns(nvme_disk); 287 288 free(nvme_disk->disk.name); 289 free(nvme_disk); 290 291 return 0; 292 } 293 294 static int 295 bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio, 296 uint64_t offset, uint64_t nbytes) 297 { 298 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); 299 300 return 0; 301 } 302 303 static void 304 _bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i) 305 { 306 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 307 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 308 struct spdk_bdev_io *bdev_io; 309 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 310 311 /* A NULL ctx means success. */ 312 if (spdk_io_channel_iter_get_ctx(i) != NULL) { 313 status = SPDK_BDEV_IO_STATUS_FAILED; 314 } 315 316 while (!TAILQ_EMPTY(&nvme_ch->pending_resets)) { 317 bdev_io = TAILQ_FIRST(&nvme_ch->pending_resets); 318 TAILQ_REMOVE(&nvme_ch->pending_resets, bdev_io, module_link); 319 spdk_bdev_io_complete(bdev_io, status); 320 } 321 322 spdk_for_each_channel_continue(i, 0); 323 } 324 325 static void 326 _bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc) 327 { 328 /* we are using the for_each_channel cb_arg like a return code here. */ 329 /* If it's zero, we succeeded, otherwise, the reset failed. */ 330 void *cb_arg = NULL; 331 332 if (rc) { 333 cb_arg = (void *)0x1; 334 SPDK_ERRLOG("Resetting controller failed.\n"); 335 } else { 336 SPDK_NOTICELOG("Resetting controller successful.\n"); 337 } 338 339 pthread_mutex_lock(&g_bdev_nvme_mutex); 340 nvme_bdev_ctrlr->resetting = false; 341 pthread_mutex_unlock(&g_bdev_nvme_mutex); 342 /* Make sure we clear any pending resets before returning. */ 343 spdk_for_each_channel(nvme_bdev_ctrlr, 344 _bdev_nvme_complete_pending_resets, 345 cb_arg, NULL); 346 } 347 348 static void 349 _bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status) 350 { 351 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 352 void *ctx = spdk_io_channel_iter_get_ctx(i); 353 int rc = SPDK_BDEV_IO_STATUS_SUCCESS; 354 355 if (status) { 356 rc = SPDK_BDEV_IO_STATUS_FAILED; 357 } 358 if (ctx) { 359 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc); 360 } 361 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 362 } 363 364 static void 365 _bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) 366 { 367 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 368 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 369 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 370 struct spdk_nvme_io_qpair_opts opts; 371 372 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 373 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 374 opts.create_only = true; 375 376 nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 377 if (!nvme_ch->qpair) { 378 spdk_for_each_channel_continue(i, -1); 379 return; 380 } 381 382 assert(nvme_ch->group != NULL); 383 if (spdk_nvme_poll_group_add(nvme_ch->group->group, nvme_ch->qpair) != 0) { 384 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n"); 385 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 386 spdk_for_each_channel_continue(i, -1); 387 return; 388 } 389 390 if (spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair)) { 391 SPDK_ERRLOG("Unable to connect I/O qpair.\n"); 392 spdk_nvme_poll_group_remove(nvme_ch->group->group, nvme_ch->qpair); 393 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 394 spdk_for_each_channel_continue(i, -1); 395 return; 396 } 397 398 spdk_for_each_channel_continue(i, 0); 399 } 400 401 static void 402 _bdev_nvme_reset(struct spdk_io_channel_iter *i, int status) 403 { 404 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 405 struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i); 406 int rc; 407 408 if (status) { 409 if (bio) { 410 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 411 } 412 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 413 return; 414 } 415 416 rc = spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr); 417 if (rc != 0) { 418 if (bio) { 419 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 420 } 421 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, rc); 422 return; 423 } 424 425 /* Recreate all of the I/O queue pairs */ 426 spdk_for_each_channel(nvme_bdev_ctrlr, 427 _bdev_nvme_reset_create_qpair, 428 bio, 429 _bdev_nvme_reset_create_qpairs_done); 430 431 432 } 433 434 static void 435 _bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) 436 { 437 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 438 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 439 int rc; 440 441 rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 442 if (!rc) { 443 nvme_ch->qpair = NULL; 444 } 445 446 spdk_for_each_channel_continue(i, rc); 447 } 448 449 static int 450 bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio) 451 { 452 struct spdk_io_channel *ch; 453 struct nvme_io_channel *nvme_ch; 454 455 pthread_mutex_lock(&g_bdev_nvme_mutex); 456 if (nvme_bdev_ctrlr->destruct) { 457 /* Don't bother resetting if the controller is in the process of being destructed. */ 458 if (bio) { 459 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 460 } 461 pthread_mutex_unlock(&g_bdev_nvme_mutex); 462 return 0; 463 } 464 465 if (!nvme_bdev_ctrlr->resetting) { 466 nvme_bdev_ctrlr->resetting = true; 467 } else { 468 pthread_mutex_unlock(&g_bdev_nvme_mutex); 469 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 470 /* 471 * The internal reset calls won't be queued. This is on purpose so that we don't 472 * interfere with the app framework reset strategy. i.e. we are deferring to the 473 * upper level. If they are in the middle of a reset, we won't try to schedule another one. 474 */ 475 if (bio) { 476 ch = spdk_get_io_channel(nvme_bdev_ctrlr); 477 assert(ch != NULL); 478 nvme_ch = spdk_io_channel_get_ctx(ch); 479 TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link); 480 spdk_put_io_channel(ch); 481 } 482 return 0; 483 } 484 485 pthread_mutex_unlock(&g_bdev_nvme_mutex); 486 /* First, delete all NVMe I/O queue pairs. */ 487 spdk_for_each_channel(nvme_bdev_ctrlr, 488 _bdev_nvme_reset_destroy_qpair, 489 bio, 490 _bdev_nvme_reset); 491 492 return 0; 493 } 494 495 static int 496 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 497 struct nvme_bdev_io *bio, 498 uint64_t offset_blocks, 499 uint64_t num_blocks); 500 501 static void 502 bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 503 bool success) 504 { 505 int ret; 506 507 if (!success) { 508 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 509 return; 510 } 511 512 ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 513 ch, 514 (struct nvme_bdev_io *)bdev_io->driver_ctx, 515 bdev_io->u.bdev.iovs, 516 bdev_io->u.bdev.iovcnt, 517 bdev_io->u.bdev.md_buf, 518 bdev_io->u.bdev.num_blocks, 519 bdev_io->u.bdev.offset_blocks); 520 521 if (spdk_likely(ret == 0)) { 522 return; 523 } else if (ret == -ENOMEM) { 524 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 525 } else { 526 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 527 } 528 } 529 530 static int 531 _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 532 { 533 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 534 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 535 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; 536 537 if (nvme_ch->qpair == NULL) { 538 /* The device is currently resetting */ 539 return -1; 540 } 541 542 switch (bdev_io->type) { 543 case SPDK_BDEV_IO_TYPE_READ: 544 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, 545 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 546 return 0; 547 548 case SPDK_BDEV_IO_TYPE_WRITE: 549 return bdev_nvme_writev(nbdev, 550 ch, 551 nbdev_io, 552 bdev_io->u.bdev.iovs, 553 bdev_io->u.bdev.iovcnt, 554 bdev_io->u.bdev.md_buf, 555 bdev_io->u.bdev.num_blocks, 556 bdev_io->u.bdev.offset_blocks); 557 558 case SPDK_BDEV_IO_TYPE_COMPARE: 559 return bdev_nvme_comparev(nbdev, 560 ch, 561 nbdev_io, 562 bdev_io->u.bdev.iovs, 563 bdev_io->u.bdev.iovcnt, 564 bdev_io->u.bdev.md_buf, 565 bdev_io->u.bdev.num_blocks, 566 bdev_io->u.bdev.offset_blocks); 567 568 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 569 return bdev_nvme_comparev_and_writev(nbdev, 570 ch, 571 nbdev_io, 572 bdev_io->u.bdev.iovs, 573 bdev_io->u.bdev.iovcnt, 574 bdev_io->u.bdev.fused_iovs, 575 bdev_io->u.bdev.fused_iovcnt, 576 bdev_io->u.bdev.md_buf, 577 bdev_io->u.bdev.num_blocks, 578 bdev_io->u.bdev.offset_blocks); 579 580 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 581 return bdev_nvme_unmap(nbdev, 582 ch, 583 nbdev_io, 584 bdev_io->u.bdev.offset_blocks, 585 bdev_io->u.bdev.num_blocks); 586 587 case SPDK_BDEV_IO_TYPE_UNMAP: 588 return bdev_nvme_unmap(nbdev, 589 ch, 590 nbdev_io, 591 bdev_io->u.bdev.offset_blocks, 592 bdev_io->u.bdev.num_blocks); 593 594 case SPDK_BDEV_IO_TYPE_RESET: 595 return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io); 596 597 case SPDK_BDEV_IO_TYPE_FLUSH: 598 return bdev_nvme_flush(nbdev, 599 nbdev_io, 600 bdev_io->u.bdev.offset_blocks, 601 bdev_io->u.bdev.num_blocks); 602 603 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 604 return bdev_nvme_admin_passthru(nbdev, 605 ch, 606 nbdev_io, 607 &bdev_io->u.nvme_passthru.cmd, 608 bdev_io->u.nvme_passthru.buf, 609 bdev_io->u.nvme_passthru.nbytes); 610 611 case SPDK_BDEV_IO_TYPE_NVME_IO: 612 return bdev_nvme_io_passthru(nbdev, 613 ch, 614 nbdev_io, 615 &bdev_io->u.nvme_passthru.cmd, 616 bdev_io->u.nvme_passthru.buf, 617 bdev_io->u.nvme_passthru.nbytes); 618 619 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 620 return bdev_nvme_io_passthru_md(nbdev, 621 ch, 622 nbdev_io, 623 &bdev_io->u.nvme_passthru.cmd, 624 bdev_io->u.nvme_passthru.buf, 625 bdev_io->u.nvme_passthru.nbytes, 626 bdev_io->u.nvme_passthru.md_buf, 627 bdev_io->u.nvme_passthru.md_len); 628 629 default: 630 return -EINVAL; 631 } 632 return 0; 633 } 634 635 static void 636 bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 637 { 638 int rc = _bdev_nvme_submit_request(ch, bdev_io); 639 640 if (spdk_unlikely(rc != 0)) { 641 if (rc == -ENOMEM) { 642 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 643 } else { 644 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 645 } 646 } 647 } 648 649 static bool 650 bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 651 { 652 struct nvme_bdev *nbdev = ctx; 653 const struct spdk_nvme_ctrlr_data *cdata; 654 655 switch (io_type) { 656 case SPDK_BDEV_IO_TYPE_READ: 657 case SPDK_BDEV_IO_TYPE_WRITE: 658 case SPDK_BDEV_IO_TYPE_RESET: 659 case SPDK_BDEV_IO_TYPE_FLUSH: 660 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 661 case SPDK_BDEV_IO_TYPE_NVME_IO: 662 return true; 663 664 case SPDK_BDEV_IO_TYPE_COMPARE: 665 return spdk_nvme_ns_supports_compare(nbdev->nvme_ns->ns); 666 667 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 668 return spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns) ? true : false; 669 670 case SPDK_BDEV_IO_TYPE_UNMAP: 671 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 672 return cdata->oncs.dsm; 673 674 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 675 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 676 /* 677 * If an NVMe controller guarantees reading unallocated blocks returns zero, 678 * we can implement WRITE_ZEROES as an NVMe deallocate command. 679 */ 680 if (cdata->oncs.dsm && 681 spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->nvme_ns->ns) == 682 SPDK_NVME_DEALLOC_READ_00) { 683 return true; 684 } 685 /* 686 * The NVMe controller write_zeroes function is currently not used by our driver. 687 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail. 688 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read. 689 */ 690 return false; 691 692 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 693 if (spdk_nvme_ctrlr_get_flags(nbdev->nvme_bdev_ctrlr->ctrlr) & 694 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) { 695 return true; 696 } 697 return false; 698 699 default: 700 return false; 701 } 702 } 703 704 static int 705 bdev_nvme_create_cb(void *io_device, void *ctx_buf) 706 { 707 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 708 struct nvme_io_channel *ch = ctx_buf; 709 struct spdk_nvme_io_qpair_opts opts; 710 struct spdk_io_channel *pg_ch = NULL; 711 int rc; 712 713 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 714 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 715 opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests); 716 opts.create_only = true; 717 g_opts.io_queue_requests = opts.io_queue_requests; 718 719 ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 720 721 if (ch->qpair == NULL) { 722 return -1; 723 } 724 725 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 726 if (bdev_ocssd_create_io_channel(ch)) { 727 goto err; 728 } 729 } 730 731 pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs); 732 if (!pg_ch) { 733 goto err; 734 } 735 736 ch->group = spdk_io_channel_get_ctx(pg_ch); 737 if (spdk_nvme_poll_group_add(ch->group->group, ch->qpair) != 0) { 738 goto err; 739 } 740 741 rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, ch->qpair); 742 if (rc) { 743 spdk_nvme_poll_group_remove(ch->group->group, ch->qpair); 744 goto err; 745 } 746 747 #ifdef SPDK_CONFIG_VTUNE 748 ch->group->collect_spin_stat = true; 749 #else 750 ch->group->collect_spin_stat = false; 751 #endif 752 753 TAILQ_INIT(&ch->pending_resets); 754 return 0; 755 756 err: 757 if (pg_ch) { 758 spdk_put_io_channel(pg_ch); 759 } 760 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 761 return -1; 762 } 763 764 static void 765 bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) 766 { 767 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 768 struct nvme_io_channel *ch = ctx_buf; 769 struct nvme_bdev_poll_group *group; 770 771 group = ch->group; 772 assert(group != NULL); 773 774 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 775 bdev_ocssd_destroy_io_channel(ch); 776 } 777 778 if (ch->qpair != NULL) { 779 spdk_nvme_poll_group_remove(group->group, ch->qpair); 780 } 781 spdk_put_io_channel(spdk_io_channel_from_ctx(group)); 782 783 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 784 } 785 786 static int 787 bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf) 788 { 789 struct nvme_bdev_poll_group *group = ctx_buf; 790 791 group->group = spdk_nvme_poll_group_create(group); 792 if (group->group == NULL) { 793 return -1; 794 } 795 796 group->poller = spdk_poller_register(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); 797 798 if (group->poller == NULL) { 799 spdk_nvme_poll_group_destroy(group->group); 800 return -1; 801 } 802 803 return 0; 804 } 805 806 static void 807 bdev_nvme_poll_group_destroy_cb(void *io_device, void *ctx_buf) 808 { 809 struct nvme_bdev_poll_group *group = ctx_buf; 810 811 spdk_poller_unregister(&group->poller); 812 if (spdk_nvme_poll_group_destroy(group->group)) { 813 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module."); 814 assert(false); 815 } 816 } 817 818 static struct spdk_io_channel * 819 bdev_nvme_get_io_channel(void *ctx) 820 { 821 struct nvme_bdev *nvme_bdev = ctx; 822 823 return spdk_get_io_channel(nvme_bdev->nvme_bdev_ctrlr); 824 } 825 826 static int 827 bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 828 { 829 struct nvme_bdev *nvme_bdev = ctx; 830 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr; 831 const struct spdk_nvme_ctrlr_data *cdata; 832 struct spdk_nvme_ns *ns; 833 union spdk_nvme_vs_register vs; 834 union spdk_nvme_csts_register csts; 835 char buf[128]; 836 837 cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 838 vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 839 csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 840 ns = nvme_bdev->nvme_ns->ns; 841 842 spdk_json_write_named_object_begin(w, "nvme"); 843 844 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 845 spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->trid.traddr); 846 } 847 848 spdk_json_write_named_object_begin(w, "trid"); 849 850 nvme_bdev_dump_trid_json(&nvme_bdev_ctrlr->trid, w); 851 852 spdk_json_write_object_end(w); 853 854 #ifdef SPDK_CONFIG_NVME_CUSE 855 size_t cuse_name_size = 128; 856 char cuse_name[cuse_name_size]; 857 858 int rc = spdk_nvme_cuse_get_ns_name(nvme_bdev->nvme_bdev_ctrlr->ctrlr, spdk_nvme_ns_get_id(ns), 859 cuse_name, &cuse_name_size); 860 if (rc == 0) { 861 spdk_json_write_named_string(w, "cuse_device", cuse_name); 862 } 863 #endif 864 865 spdk_json_write_named_object_begin(w, "ctrlr_data"); 866 867 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); 868 869 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); 870 spdk_str_trim(buf); 871 spdk_json_write_named_string(w, "model_number", buf); 872 873 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); 874 spdk_str_trim(buf); 875 spdk_json_write_named_string(w, "serial_number", buf); 876 877 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); 878 spdk_str_trim(buf); 879 spdk_json_write_named_string(w, "firmware_revision", buf); 880 881 spdk_json_write_named_object_begin(w, "oacs"); 882 883 spdk_json_write_named_uint32(w, "security", cdata->oacs.security); 884 spdk_json_write_named_uint32(w, "format", cdata->oacs.format); 885 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); 886 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); 887 888 spdk_json_write_object_end(w); 889 890 spdk_json_write_object_end(w); 891 892 spdk_json_write_named_object_begin(w, "vs"); 893 894 spdk_json_write_name(w, "nvme_version"); 895 if (vs.bits.ter) { 896 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); 897 } else { 898 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); 899 } 900 901 spdk_json_write_object_end(w); 902 903 spdk_json_write_named_object_begin(w, "csts"); 904 905 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); 906 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); 907 908 spdk_json_write_object_end(w); 909 910 spdk_json_write_named_object_begin(w, "ns_data"); 911 912 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); 913 914 spdk_json_write_object_end(w); 915 916 if (cdata->oacs.security) { 917 spdk_json_write_named_object_begin(w, "security"); 918 919 spdk_json_write_named_bool(w, "opal", nvme_bdev_ctrlr->opal_dev ? true : false); 920 921 spdk_json_write_object_end(w); 922 } 923 924 spdk_json_write_object_end(w); 925 926 return 0; 927 } 928 929 static void 930 bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 931 { 932 /* No config per bdev needed */ 933 } 934 935 static uint64_t 936 bdev_nvme_get_spin_time(struct spdk_io_channel *ch) 937 { 938 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 939 struct nvme_bdev_poll_group *group = nvme_ch->group; 940 uint64_t spin_time; 941 942 if (!group || !group->collect_spin_stat) { 943 return 0; 944 } 945 946 if (group->end_ticks != 0) { 947 group->spin_ticks += (group->end_ticks - group->start_ticks); 948 group->end_ticks = 0; 949 } 950 951 spin_time = (group->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); 952 group->start_ticks = 0; 953 group->spin_ticks = 0; 954 955 return spin_time; 956 } 957 958 static const struct spdk_bdev_fn_table nvmelib_fn_table = { 959 .destruct = bdev_nvme_destruct, 960 .submit_request = bdev_nvme_submit_request, 961 .io_type_supported = bdev_nvme_io_type_supported, 962 .get_io_channel = bdev_nvme_get_io_channel, 963 .dump_info_json = bdev_nvme_dump_info_json, 964 .write_config_json = bdev_nvme_write_config_json, 965 .get_spin_time = bdev_nvme_get_spin_time, 966 }; 967 968 static void 969 nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 970 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx) 971 { 972 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 973 struct nvme_bdev *bdev; 974 struct spdk_nvme_ns *ns; 975 const struct spdk_uuid *uuid; 976 const struct spdk_nvme_ctrlr_data *cdata; 977 const struct spdk_nvme_ns_data *nsdata; 978 int rc; 979 980 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 981 982 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 983 if (!ns) { 984 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nvme_ns->id); 985 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -EINVAL); 986 return; 987 } 988 989 bdev = calloc(1, sizeof(*bdev)); 990 if (!bdev) { 991 SPDK_ERRLOG("bdev calloc() failed\n"); 992 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 993 return; 994 } 995 996 bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr; 997 nvme_ns->ns = ns; 998 bdev->nvme_ns = nvme_ns; 999 1000 bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns)); 1001 if (!bdev->disk.name) { 1002 free(bdev); 1003 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 1004 return; 1005 } 1006 bdev->disk.product_name = "NVMe disk"; 1007 1008 bdev->disk.write_cache = 0; 1009 if (cdata->vwc.present) { 1010 /* Enable if the Volatile Write Cache exists */ 1011 bdev->disk.write_cache = 1; 1012 } 1013 bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns); 1014 bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); 1015 bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); 1016 1017 uuid = spdk_nvme_ns_get_uuid(ns); 1018 if (uuid != NULL) { 1019 bdev->disk.uuid = *uuid; 1020 } 1021 1022 nsdata = spdk_nvme_ns_get_data(ns); 1023 1024 bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns); 1025 if (bdev->disk.md_len != 0) { 1026 bdev->disk.md_interleave = nsdata->flbas.extended; 1027 bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns); 1028 if (bdev->disk.dif_type != SPDK_DIF_DISABLE) { 1029 bdev->disk.dif_is_head_of_md = nsdata->dps.md_start; 1030 bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags; 1031 } 1032 } 1033 1034 if (!bdev_nvme_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) { 1035 bdev->disk.acwu = 0; 1036 } else if (nsdata->nsfeat.ns_atomic_write_unit) { 1037 bdev->disk.acwu = nsdata->nacwu; 1038 } else { 1039 bdev->disk.acwu = cdata->acwu; 1040 } 1041 1042 bdev->disk.ctxt = bdev; 1043 bdev->disk.fn_table = &nvmelib_fn_table; 1044 bdev->disk.module = &nvme_if; 1045 rc = spdk_bdev_register(&bdev->disk); 1046 if (rc) { 1047 free(bdev->disk.name); 1048 free(bdev); 1049 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, rc); 1050 return; 1051 } 1052 1053 nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev); 1054 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0); 1055 } 1056 1057 static bool 1058 hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1059 struct spdk_nvme_ctrlr_opts *opts) 1060 { 1061 struct nvme_probe_skip_entry *entry; 1062 1063 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) { 1064 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1065 return false; 1066 } 1067 } 1068 1069 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1070 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1071 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1072 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1073 1074 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr); 1075 1076 return true; 1077 } 1078 1079 static bool 1080 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1081 struct spdk_nvme_ctrlr_opts *opts) 1082 { 1083 struct nvme_probe_ctx *ctx = cb_ctx; 1084 1085 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr); 1086 1087 if (nvme_bdev_ctrlr_get(trid)) { 1088 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1089 trid->traddr); 1090 return false; 1091 } 1092 1093 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1094 bool claim_device = false; 1095 size_t i; 1096 1097 for (i = 0; i < ctx->count; i++) { 1098 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1099 claim_device = true; 1100 break; 1101 } 1102 } 1103 1104 if (!claim_device) { 1105 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr); 1106 return false; 1107 } 1108 } 1109 1110 if (ctx->hostnqn) { 1111 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); 1112 } 1113 1114 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1115 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1116 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1117 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1118 1119 return true; 1120 } 1121 1122 static void 1123 nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) 1124 { 1125 struct spdk_nvme_ctrlr *ctrlr = ctx; 1126 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1127 1128 if (spdk_nvme_cpl_is_error(cpl)) { 1129 SPDK_WARNLOG("Abort failed. Resetting controller.\n"); 1130 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1131 assert(nvme_bdev_ctrlr != NULL); 1132 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1133 } 1134 } 1135 1136 static void 1137 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, 1138 struct spdk_nvme_qpair *qpair, uint16_t cid) 1139 { 1140 int rc; 1141 union spdk_nvme_csts_register csts; 1142 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1143 1144 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); 1145 1146 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1147 if (csts.bits.cfs) { 1148 SPDK_ERRLOG("Controller Fatal Status, reset required\n"); 1149 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1150 assert(nvme_bdev_ctrlr != NULL); 1151 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1152 return; 1153 } 1154 1155 switch (g_opts.action_on_timeout) { 1156 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 1157 if (qpair) { 1158 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, 1159 nvme_abort_cpl, ctrlr); 1160 if (rc == 0) { 1161 return; 1162 } 1163 1164 SPDK_ERRLOG("Unable to send abort. Resetting.\n"); 1165 } 1166 1167 /* FALLTHROUGH */ 1168 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 1169 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1170 assert(nvme_bdev_ctrlr != NULL); 1171 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1172 break; 1173 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 1174 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n"); 1175 break; 1176 default: 1177 SPDK_ERRLOG("An invalid timeout action value is found.\n"); 1178 break; 1179 } 1180 } 1181 1182 void 1183 nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) 1184 { 1185 pthread_mutex_lock(&g_bdev_nvme_mutex); 1186 nvme_bdev_ctrlr->ref--; 1187 1188 if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) { 1189 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1190 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1191 return; 1192 } 1193 1194 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1195 } 1196 1197 static void 1198 nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns) 1199 { 1200 struct nvme_bdev *bdev, *tmp; 1201 1202 TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { 1203 spdk_bdev_unregister(&bdev->disk, NULL, NULL); 1204 } 1205 1206 ns->populated = false; 1207 1208 nvme_ctrlr_depopulate_namespace_done(ns->ctrlr); 1209 } 1210 1211 static void nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, 1212 struct nvme_async_probe_ctx *ctx) 1213 { 1214 g_populate_namespace_fn[ns->type](ctrlr, ns, ctx); 1215 } 1216 1217 static void nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns) 1218 { 1219 g_depopulate_namespace_fn[ns->type](ns); 1220 } 1221 1222 void 1223 nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx, 1224 struct nvme_bdev_ns *ns, int rc) 1225 { 1226 if (rc == 0) { 1227 ns->populated = true; 1228 pthread_mutex_lock(&g_bdev_nvme_mutex); 1229 ns->ctrlr->ref++; 1230 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1231 } else { 1232 memset(ns, 0, sizeof(*ns)); 1233 } 1234 1235 if (ctx) { 1236 ctx->populates_in_progress--; 1237 if (ctx->populates_in_progress == 0) { 1238 nvme_ctrlr_populate_namespaces_done(ctx); 1239 } 1240 } 1241 } 1242 1243 static void 1244 nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 1245 struct nvme_async_probe_ctx *ctx) 1246 { 1247 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 1248 struct nvme_bdev_ns *ns; 1249 struct spdk_nvme_ns *nvme_ns; 1250 struct nvme_bdev *bdev; 1251 uint32_t i; 1252 int rc; 1253 uint64_t num_sectors; 1254 bool ns_is_active; 1255 1256 if (ctx) { 1257 /* Initialize this count to 1 to handle the populate functions 1258 * calling nvme_ctrlr_populate_namespace_done() immediately. 1259 */ 1260 ctx->populates_in_progress = 1; 1261 } 1262 1263 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1264 uint32_t nsid = i + 1; 1265 1266 ns = nvme_bdev_ctrlr->namespaces[i]; 1267 ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); 1268 1269 if (ns->populated && ns_is_active && ns->type == NVME_BDEV_NS_STANDARD) { 1270 /* NS is still there but attributes may have changed */ 1271 nvme_ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 1272 num_sectors = spdk_nvme_ns_get_num_sectors(nvme_ns); 1273 bdev = TAILQ_FIRST(&ns->bdevs); 1274 if (bdev->disk.blockcnt != num_sectors) { 1275 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %lu, new size %lu\n", 1276 nsid, 1277 bdev->disk.name, 1278 bdev->disk.blockcnt, 1279 num_sectors); 1280 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors); 1281 if (rc != 0) { 1282 SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n", 1283 bdev->disk.name, rc); 1284 } 1285 } 1286 } 1287 1288 if (!ns->populated && ns_is_active) { 1289 ns->id = nsid; 1290 ns->ctrlr = nvme_bdev_ctrlr; 1291 if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { 1292 ns->type = NVME_BDEV_NS_OCSSD; 1293 } else { 1294 ns->type = NVME_BDEV_NS_STANDARD; 1295 } 1296 1297 TAILQ_INIT(&ns->bdevs); 1298 1299 if (ctx) { 1300 ctx->populates_in_progress++; 1301 } 1302 nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx); 1303 } 1304 1305 if (ns->populated && !ns_is_active) { 1306 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1307 } 1308 } 1309 1310 if (ctx) { 1311 /* Decrement this count now that the loop is over to account 1312 * for the one we started with. If the count is then 0, we 1313 * know any populate_namespace functions completed immediately, 1314 * so we'll kick the callback here. 1315 */ 1316 ctx->populates_in_progress--; 1317 if (ctx->populates_in_progress == 0) { 1318 nvme_ctrlr_populate_namespaces_done(ctx); 1319 } 1320 } 1321 1322 } 1323 1324 static void 1325 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1326 { 1327 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg; 1328 union spdk_nvme_async_event_completion event; 1329 1330 if (spdk_nvme_cpl_is_error(cpl)) { 1331 SPDK_WARNLOG("AER request execute failed"); 1332 return; 1333 } 1334 1335 event.raw = cpl->cdw0; 1336 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1337 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1338 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1339 } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR) && 1340 (event.bits.log_page_identifier == SPDK_OCSSD_LOG_CHUNK_NOTIFICATION) && 1341 spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1342 bdev_ocssd_handle_chunk_notification(nvme_bdev_ctrlr); 1343 } 1344 } 1345 1346 static int 1347 create_ctrlr(struct spdk_nvme_ctrlr *ctrlr, 1348 const char *name, 1349 const struct spdk_nvme_transport_id *trid, 1350 uint32_t prchk_flags) 1351 { 1352 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1353 uint32_t i; 1354 int rc; 1355 1356 nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr)); 1357 if (nvme_bdev_ctrlr == NULL) { 1358 SPDK_ERRLOG("Failed to allocate device struct\n"); 1359 return -ENOMEM; 1360 } 1361 nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 1362 nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *)); 1363 if (!nvme_bdev_ctrlr->namespaces) { 1364 SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); 1365 free(nvme_bdev_ctrlr); 1366 return -ENOMEM; 1367 } 1368 1369 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1370 nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns)); 1371 if (nvme_bdev_ctrlr->namespaces[i] == NULL) { 1372 SPDK_ERRLOG("Failed to allocate block namespace struct\n"); 1373 for (; i > 0; i--) { 1374 free(nvme_bdev_ctrlr->namespaces[i - 1]); 1375 } 1376 free(nvme_bdev_ctrlr->namespaces); 1377 free(nvme_bdev_ctrlr); 1378 return -ENOMEM; 1379 } 1380 } 1381 1382 nvme_bdev_ctrlr->adminq_timer_poller = NULL; 1383 nvme_bdev_ctrlr->ctrlr = ctrlr; 1384 nvme_bdev_ctrlr->ref = 0; 1385 nvme_bdev_ctrlr->trid = *trid; 1386 nvme_bdev_ctrlr->name = strdup(name); 1387 if (nvme_bdev_ctrlr->name == NULL) { 1388 free(nvme_bdev_ctrlr->namespaces); 1389 free(nvme_bdev_ctrlr); 1390 return -ENOMEM; 1391 } 1392 1393 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1394 rc = bdev_ocssd_init_ctrlr(nvme_bdev_ctrlr); 1395 if (spdk_unlikely(rc != 0)) { 1396 SPDK_ERRLOG("Unable to initialize OCSSD controller\n"); 1397 free(nvme_bdev_ctrlr->name); 1398 free(nvme_bdev_ctrlr->namespaces); 1399 free(nvme_bdev_ctrlr); 1400 return rc; 1401 } 1402 } 1403 1404 nvme_bdev_ctrlr->prchk_flags = prchk_flags; 1405 1406 spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb, 1407 sizeof(struct nvme_io_channel), 1408 name); 1409 1410 nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, ctrlr, 1411 g_opts.nvme_adminq_poll_period_us); 1412 1413 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq); 1414 1415 if (g_opts.timeout_us > 0) { 1416 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, 1417 timeout_cb, NULL); 1418 } 1419 1420 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr); 1421 1422 if (spdk_nvme_ctrlr_get_flags(nvme_bdev_ctrlr->ctrlr) & 1423 SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { 1424 nvme_bdev_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_bdev_ctrlr->ctrlr); 1425 if (nvme_bdev_ctrlr->opal_dev == NULL) { 1426 SPDK_ERRLOG("Failed to initialize Opal\n"); 1427 } 1428 } 1429 return 0; 1430 } 1431 1432 static void 1433 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1434 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1435 { 1436 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1437 struct nvme_probe_ctx *ctx = cb_ctx; 1438 char *name = NULL; 1439 uint32_t prchk_flags = 0; 1440 size_t i; 1441 1442 if (ctx) { 1443 for (i = 0; i < ctx->count; i++) { 1444 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1445 prchk_flags = ctx->prchk_flags[i]; 1446 name = strdup(ctx->names[i]); 1447 break; 1448 } 1449 } 1450 } else { 1451 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); 1452 } 1453 if (!name) { 1454 SPDK_ERRLOG("Failed to assign name to NVMe device\n"); 1455 return; 1456 } 1457 1458 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name); 1459 1460 create_ctrlr(ctrlr, name, trid, prchk_flags); 1461 1462 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); 1463 if (!nvme_bdev_ctrlr) { 1464 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1465 free(name); 1466 return; 1467 } 1468 1469 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1470 1471 free(name); 1472 } 1473 1474 static void 1475 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 1476 { 1477 uint32_t i; 1478 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1479 struct nvme_bdev_ns *ns; 1480 1481 pthread_mutex_lock(&g_bdev_nvme_mutex); 1482 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 1483 if (nvme_bdev_ctrlr->ctrlr == ctrlr) { 1484 /* The controller's destruction was already started */ 1485 if (nvme_bdev_ctrlr->destruct) { 1486 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1487 return; 1488 } 1489 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1490 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1491 uint32_t nsid = i + 1; 1492 1493 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1494 if (ns->populated) { 1495 assert(ns->id == nsid); 1496 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1497 } 1498 } 1499 1500 pthread_mutex_lock(&g_bdev_nvme_mutex); 1501 nvme_bdev_ctrlr->destruct = true; 1502 if (nvme_bdev_ctrlr->ref == 0) { 1503 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1504 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1505 } else { 1506 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1507 } 1508 return; 1509 } 1510 } 1511 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1512 } 1513 1514 static int 1515 bdev_nvme_hotplug(void *arg) 1516 { 1517 struct spdk_nvme_transport_id trid_pcie; 1518 int done; 1519 1520 if (!g_hotplug_probe_ctx) { 1521 memset(&trid_pcie, 0, sizeof(trid_pcie)); 1522 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 1523 1524 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL, 1525 hotplug_probe_cb, 1526 attach_cb, remove_cb); 1527 if (!g_hotplug_probe_ctx) { 1528 return -1; 1529 } 1530 } 1531 1532 done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx); 1533 if (done != -EAGAIN) { 1534 g_hotplug_probe_ctx = NULL; 1535 return 1; 1536 } 1537 1538 return -1; 1539 } 1540 1541 void 1542 bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) 1543 { 1544 *opts = g_opts; 1545 } 1546 1547 int 1548 bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) 1549 { 1550 if (g_bdev_nvme_init_thread != NULL) { 1551 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 1552 return -EPERM; 1553 } 1554 } 1555 1556 g_opts = *opts; 1557 1558 return 0; 1559 } 1560 1561 struct set_nvme_hotplug_ctx { 1562 uint64_t period_us; 1563 bool enabled; 1564 spdk_msg_fn fn; 1565 void *fn_ctx; 1566 }; 1567 1568 static void 1569 set_nvme_hotplug_period_cb(void *_ctx) 1570 { 1571 struct set_nvme_hotplug_ctx *ctx = _ctx; 1572 1573 spdk_poller_unregister(&g_hotplug_poller); 1574 if (ctx->enabled) { 1575 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us); 1576 } 1577 1578 g_nvme_hotplug_poll_period_us = ctx->period_us; 1579 g_nvme_hotplug_enabled = ctx->enabled; 1580 if (ctx->fn) { 1581 ctx->fn(ctx->fn_ctx); 1582 } 1583 1584 free(ctx); 1585 } 1586 1587 int 1588 bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx) 1589 { 1590 struct set_nvme_hotplug_ctx *ctx; 1591 1592 if (enabled == true && !spdk_process_is_primary()) { 1593 return -EPERM; 1594 } 1595 1596 ctx = calloc(1, sizeof(*ctx)); 1597 if (ctx == NULL) { 1598 return -ENOMEM; 1599 } 1600 1601 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; 1602 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); 1603 ctx->enabled = enabled; 1604 ctx->fn = cb; 1605 ctx->fn_ctx = cb_ctx; 1606 1607 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); 1608 return 0; 1609 } 1610 1611 static void 1612 populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) 1613 { 1614 if (ctx->cb_fn) { 1615 ctx->cb_fn(ctx->cb_ctx, count, rc); 1616 } 1617 1618 free(ctx); 1619 } 1620 1621 static void 1622 nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx) 1623 { 1624 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1625 struct nvme_bdev_ns *ns; 1626 struct nvme_bdev *nvme_bdev, *tmp; 1627 uint32_t i, nsid; 1628 size_t j; 1629 1630 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1631 assert(nvme_bdev_ctrlr != NULL); 1632 1633 /* 1634 * Report the new bdevs that were created in this call. 1635 * There can be more than one bdev per NVMe controller. 1636 */ 1637 j = 0; 1638 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1639 nsid = i + 1; 1640 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1641 if (!ns->populated) { 1642 continue; 1643 } 1644 assert(ns->id == nsid); 1645 TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { 1646 if (j < ctx->count) { 1647 ctx->names[j] = nvme_bdev->disk.name; 1648 j++; 1649 } else { 1650 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n", 1651 ctx->count); 1652 populate_namespaces_cb(ctx, 0, -ERANGE); 1653 return; 1654 } 1655 } 1656 } 1657 1658 populate_namespaces_cb(ctx, j, 0); 1659 } 1660 1661 static void 1662 connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1663 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1664 { 1665 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 1666 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1667 struct nvme_async_probe_ctx *ctx; 1668 int rc; 1669 1670 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 1671 1672 spdk_poller_unregister(&ctx->poller); 1673 1674 rc = create_ctrlr(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags); 1675 if (rc) { 1676 SPDK_ERRLOG("Failed to create new device\n"); 1677 populate_namespaces_cb(ctx, 0, rc); 1678 return; 1679 } 1680 1681 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1682 assert(nvme_bdev_ctrlr != NULL); 1683 1684 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, ctx); 1685 } 1686 1687 static int 1688 bdev_nvme_async_poll(void *arg) 1689 { 1690 struct nvme_async_probe_ctx *ctx = arg; 1691 int rc; 1692 1693 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); 1694 if (spdk_unlikely(rc != -EAGAIN && rc != 0)) { 1695 spdk_poller_unregister(&ctx->poller); 1696 free(ctx); 1697 } 1698 1699 return 1; 1700 } 1701 1702 int 1703 bdev_nvme_create(struct spdk_nvme_transport_id *trid, 1704 struct spdk_nvme_host_id *hostid, 1705 const char *base_name, 1706 const char **names, 1707 uint32_t count, 1708 const char *hostnqn, 1709 uint32_t prchk_flags, 1710 spdk_bdev_create_nvme_fn cb_fn, 1711 void *cb_ctx) 1712 { 1713 struct nvme_probe_skip_entry *entry, *tmp; 1714 struct nvme_async_probe_ctx *ctx; 1715 1716 if (nvme_bdev_ctrlr_get(trid) != NULL) { 1717 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); 1718 return -EEXIST; 1719 } 1720 1721 if (nvme_bdev_ctrlr_get_by_name(base_name)) { 1722 SPDK_ERRLOG("A controller with the provided name (%s) already exists.\n", base_name); 1723 return -EEXIST; 1724 } 1725 1726 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1727 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { 1728 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1729 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1730 free(entry); 1731 break; 1732 } 1733 } 1734 } 1735 1736 ctx = calloc(1, sizeof(*ctx)); 1737 if (!ctx) { 1738 return -ENOMEM; 1739 } 1740 ctx->base_name = base_name; 1741 ctx->names = names; 1742 ctx->count = count; 1743 ctx->cb_fn = cb_fn; 1744 ctx->cb_ctx = cb_ctx; 1745 ctx->prchk_flags = prchk_flags; 1746 ctx->trid = *trid; 1747 1748 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts)); 1749 ctx->opts.transport_retry_count = g_opts.retry_count; 1750 1751 if (hostnqn) { 1752 snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn); 1753 } 1754 1755 if (hostid->hostaddr[0] != '\0') { 1756 snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr); 1757 } 1758 1759 if (hostid->hostsvcid[0] != '\0') { 1760 snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid); 1761 } 1762 1763 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb); 1764 if (ctx->probe_ctx == NULL) { 1765 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr); 1766 free(ctx); 1767 return -ENODEV; 1768 } 1769 ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); 1770 1771 return 0; 1772 } 1773 1774 int 1775 bdev_nvme_delete(const char *name) 1776 { 1777 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; 1778 struct nvme_probe_skip_entry *entry; 1779 1780 if (name == NULL) { 1781 return -EINVAL; 1782 } 1783 1784 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name); 1785 if (nvme_bdev_ctrlr == NULL) { 1786 SPDK_ERRLOG("Failed to find NVMe controller\n"); 1787 return -ENODEV; 1788 } 1789 1790 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1791 entry = calloc(1, sizeof(*entry)); 1792 if (!entry) { 1793 return -ENOMEM; 1794 } 1795 entry->trid = nvme_bdev_ctrlr->trid; 1796 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq); 1797 } 1798 1799 remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); 1800 return 0; 1801 } 1802 1803 static int 1804 bdev_nvme_library_init(void) 1805 { 1806 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1807 struct spdk_conf_section *sp; 1808 const char *val; 1809 int rc = 0; 1810 int64_t intval = 0; 1811 size_t i; 1812 struct nvme_probe_ctx *probe_ctx = NULL; 1813 int retry_count; 1814 uint32_t local_nvme_num = 0; 1815 int64_t hotplug_period; 1816 bool hotplug_enabled = g_nvme_hotplug_enabled; 1817 1818 g_bdev_nvme_init_thread = spdk_get_thread(); 1819 1820 spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_poll_group_create_cb, 1821 bdev_nvme_poll_group_destroy_cb, 1822 sizeof(struct nvme_bdev_poll_group), "bdev_nvme_poll_groups"); 1823 1824 sp = spdk_conf_find_section(NULL, "Nvme"); 1825 if (sp == NULL) { 1826 goto end; 1827 } 1828 1829 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1830 if (probe_ctx == NULL) { 1831 SPDK_ERRLOG("Failed to allocate probe_ctx\n"); 1832 rc = -1; 1833 goto end; 1834 } 1835 1836 retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); 1837 if (retry_count >= 0) { 1838 g_opts.retry_count = retry_count; 1839 } 1840 1841 val = spdk_conf_section_get_val(sp, "TimeoutUsec"); 1842 if (val != NULL) { 1843 intval = spdk_strtoll(val, 10); 1844 if (intval < 0) { 1845 SPDK_ERRLOG("Invalid TimeoutUsec value\n"); 1846 rc = -1; 1847 goto end; 1848 } 1849 } 1850 1851 g_opts.timeout_us = intval; 1852 1853 if (g_opts.timeout_us > 0) { 1854 val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); 1855 if (val != NULL) { 1856 if (!strcasecmp(val, "Reset")) { 1857 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; 1858 } else if (!strcasecmp(val, "Abort")) { 1859 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; 1860 } 1861 } 1862 } 1863 1864 intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); 1865 if (intval > 0) { 1866 g_opts.nvme_adminq_poll_period_us = intval; 1867 } 1868 1869 intval = spdk_conf_section_get_intval(sp, "IOPollRate"); 1870 if (intval > 0) { 1871 g_opts.nvme_ioq_poll_period_us = intval; 1872 } 1873 1874 if (spdk_process_is_primary()) { 1875 hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false); 1876 } 1877 1878 hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate"); 1879 if (hotplug_period < 0) { 1880 hotplug_period = 0; 1881 } 1882 1883 g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN"); 1884 probe_ctx->hostnqn = g_nvme_hostnqn; 1885 1886 g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit", 1887 SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT); 1888 1889 for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { 1890 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); 1891 if (val == NULL) { 1892 break; 1893 } 1894 1895 rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); 1896 if (rc < 0) { 1897 SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); 1898 rc = -1; 1899 goto end; 1900 } 1901 1902 rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val); 1903 if (rc < 0) { 1904 SPDK_ERRLOG("Unable to parse HostID: %s\n", val); 1905 rc = -1; 1906 goto end; 1907 } 1908 1909 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); 1910 if (val == NULL) { 1911 SPDK_ERRLOG("No name provided for TransportID\n"); 1912 rc = -1; 1913 goto end; 1914 } 1915 1916 probe_ctx->names[i] = val; 1917 1918 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2); 1919 if (val != NULL) { 1920 rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val); 1921 if (rc < 0) { 1922 SPDK_ERRLOG("Unable to parse prchk: %s\n", val); 1923 rc = -1; 1924 goto end; 1925 } 1926 } 1927 1928 probe_ctx->count++; 1929 1930 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 1931 struct spdk_nvme_ctrlr *ctrlr; 1932 struct spdk_nvme_ctrlr_opts opts; 1933 1934 if (nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 1935 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1936 probe_ctx->trids[i].traddr); 1937 rc = -1; 1938 goto end; 1939 } 1940 1941 if (probe_ctx->trids[i].subnqn[0] == '\0') { 1942 SPDK_ERRLOG("Need to provide subsystem nqn\n"); 1943 rc = -1; 1944 goto end; 1945 } 1946 1947 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 1948 opts.transport_retry_count = g_opts.retry_count; 1949 1950 if (probe_ctx->hostnqn != NULL) { 1951 snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn); 1952 } 1953 1954 if (probe_ctx->hostids[i].hostaddr[0] != '\0') { 1955 snprintf(opts.src_addr, sizeof(opts.src_addr), "%s", probe_ctx->hostids[i].hostaddr); 1956 } 1957 1958 if (probe_ctx->hostids[i].hostsvcid[0] != '\0') { 1959 snprintf(opts.src_svcid, sizeof(opts.src_svcid), "%s", probe_ctx->hostids[i].hostsvcid); 1960 } 1961 1962 ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts)); 1963 if (ctrlr == NULL) { 1964 SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n", 1965 probe_ctx->trids[i].traddr); 1966 rc = -1; 1967 goto end; 1968 } 1969 1970 rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i], 0); 1971 if (rc) { 1972 goto end; 1973 } 1974 1975 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&probe_ctx->trids[i]); 1976 if (!nvme_bdev_ctrlr) { 1977 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1978 rc = -ENODEV; 1979 goto end; 1980 } 1981 1982 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1983 } else { 1984 local_nvme_num++; 1985 } 1986 } 1987 1988 if (local_nvme_num > 0) { 1989 /* used to probe local NVMe device */ 1990 if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) { 1991 rc = -1; 1992 goto end; 1993 } 1994 1995 for (i = 0; i < probe_ctx->count; i++) { 1996 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 1997 continue; 1998 } 1999 2000 if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 2001 SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); 2002 SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); 2003 } 2004 } 2005 } 2006 2007 rc = bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL); 2008 if (rc) { 2009 SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc)); 2010 rc = -1; 2011 } 2012 end: 2013 free(probe_ctx); 2014 return rc; 2015 } 2016 2017 static void 2018 bdev_nvme_library_fini(void) 2019 { 2020 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp; 2021 struct nvme_probe_skip_entry *entry, *entry_tmp; 2022 struct nvme_bdev_ns *ns; 2023 uint32_t i; 2024 2025 spdk_poller_unregister(&g_hotplug_poller); 2026 free(g_hotplug_probe_ctx); 2027 2028 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) { 2029 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 2030 free(entry); 2031 } 2032 2033 pthread_mutex_lock(&g_bdev_nvme_mutex); 2034 TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) { 2035 if (nvme_bdev_ctrlr->destruct) { 2036 /* This controller's destruction was already started 2037 * before the application started shutting down 2038 */ 2039 continue; 2040 } 2041 2042 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2043 2044 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 2045 uint32_t nsid = i + 1; 2046 2047 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 2048 if (ns->populated) { 2049 assert(ns->id == nsid); 2050 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 2051 } 2052 } 2053 2054 pthread_mutex_lock(&g_bdev_nvme_mutex); 2055 nvme_bdev_ctrlr->destruct = true; 2056 2057 if (nvme_bdev_ctrlr->ref == 0) { 2058 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2059 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 2060 pthread_mutex_lock(&g_bdev_nvme_mutex); 2061 } 2062 } 2063 2064 g_bdev_nvme_module_finish = true; 2065 if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 2066 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2067 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL); 2068 spdk_bdev_module_finish_done(); 2069 return; 2070 } 2071 2072 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2073 } 2074 2075 static void 2076 bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io) 2077 { 2078 struct spdk_bdev *bdev = bdev_io->bdev; 2079 struct spdk_dif_ctx dif_ctx; 2080 struct spdk_dif_error err_blk = {}; 2081 int rc; 2082 2083 rc = spdk_dif_ctx_init(&dif_ctx, 2084 bdev->blocklen, bdev->md_len, bdev->md_interleave, 2085 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 2086 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0); 2087 if (rc != 0) { 2088 SPDK_ERRLOG("Initialization of DIF context failed\n"); 2089 return; 2090 } 2091 2092 if (bdev->md_interleave) { 2093 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2094 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2095 } else { 2096 struct iovec md_iov = { 2097 .iov_base = bdev_io->u.bdev.md_buf, 2098 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 2099 }; 2100 2101 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2102 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2103 } 2104 2105 if (rc != 0) { 2106 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2107 err_blk.err_type, err_blk.err_offset); 2108 } else { 2109 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n"); 2110 } 2111 } 2112 2113 static void 2114 bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2115 { 2116 struct nvme_bdev_io *bio = ref; 2117 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2118 2119 if (spdk_nvme_cpl_is_success(cpl)) { 2120 /* Run PI verification for read data buffer. */ 2121 bdev_nvme_verify_pi_error(bdev_io); 2122 } 2123 2124 /* Return original completion status */ 2125 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, 2126 bio->cpl.status.sc); 2127 } 2128 2129 static void 2130 bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2131 { 2132 struct nvme_bdev_io *bio = ref; 2133 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2134 int ret; 2135 2136 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) { 2137 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n", 2138 cpl->status.sct, cpl->status.sc); 2139 2140 /* Save completion status to use after verifying PI error. */ 2141 bio->cpl = *cpl; 2142 2143 /* Read without PI checking to verify PI error. */ 2144 ret = bdev_nvme_no_pi_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 2145 spdk_bdev_io_get_io_channel(bdev_io), 2146 bio, 2147 bdev_io->u.bdev.iovs, 2148 bdev_io->u.bdev.iovcnt, 2149 bdev_io->u.bdev.md_buf, 2150 bdev_io->u.bdev.num_blocks, 2151 bdev_io->u.bdev.offset_blocks); 2152 if (ret == 0) { 2153 return; 2154 } 2155 } 2156 2157 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2158 } 2159 2160 static void 2161 bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2162 { 2163 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2164 2165 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2166 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n", 2167 cpl->status.sct, cpl->status.sc); 2168 /* Run PI verification for write data buffer if PI error is detected. */ 2169 bdev_nvme_verify_pi_error(bdev_io); 2170 } 2171 2172 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2173 } 2174 2175 static void 2176 bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2177 { 2178 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2179 2180 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2181 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n", 2182 cpl->status.sct, cpl->status.sc); 2183 /* Run PI verification for compare data buffer if PI error is detected. */ 2184 bdev_nvme_verify_pi_error(bdev_io); 2185 } 2186 2187 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2188 } 2189 2190 static void 2191 bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2192 { 2193 struct nvme_bdev_io *bio = ref; 2194 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2195 2196 /* Compare operation completion */ 2197 if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { 2198 /* Save compare result for write callback */ 2199 bio->cpl = *cpl; 2200 return; 2201 } 2202 2203 /* Write operation completion */ 2204 if (spdk_nvme_cpl_is_error(&bio->cpl)) { 2205 /* If bio->cpl is already an error, it means the compare operation failed. In that case, 2206 * complete the IO with the compare operation's status. 2207 */ 2208 if (!spdk_nvme_cpl_is_error(cpl)) { 2209 SPDK_ERRLOG("Unexpected write success after compare failure.\n"); 2210 } 2211 2212 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2213 } else { 2214 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2215 } 2216 } 2217 2218 static void 2219 bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) 2220 { 2221 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2222 2223 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2224 } 2225 2226 static void 2227 bdev_nvme_admin_passthru_completion(void *ctx) 2228 { 2229 struct nvme_bdev_io *bio = ctx; 2230 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2231 2232 spdk_bdev_io_complete_nvme_status(bdev_io, 2233 bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2234 } 2235 2236 static void 2237 bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) 2238 { 2239 struct nvme_bdev_io *bio = ref; 2240 2241 bio->cpl = *cpl; 2242 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); 2243 } 2244 2245 static void 2246 bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) 2247 { 2248 struct nvme_bdev_io *bio = ref; 2249 struct iovec *iov; 2250 2251 bio->iov_offset = sgl_offset; 2252 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { 2253 iov = &bio->iovs[bio->iovpos]; 2254 if (bio->iov_offset < iov->iov_len) { 2255 break; 2256 } 2257 2258 bio->iov_offset -= iov->iov_len; 2259 } 2260 } 2261 2262 static int 2263 bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) 2264 { 2265 struct nvme_bdev_io *bio = ref; 2266 struct iovec *iov; 2267 2268 assert(bio->iovpos < bio->iovcnt); 2269 2270 iov = &bio->iovs[bio->iovpos]; 2271 2272 *address = iov->iov_base; 2273 *length = iov->iov_len; 2274 2275 if (bio->iov_offset) { 2276 assert(bio->iov_offset <= iov->iov_len); 2277 *address += bio->iov_offset; 2278 *length -= bio->iov_offset; 2279 } 2280 2281 bio->iov_offset += *length; 2282 if (bio->iov_offset == iov->iov_len) { 2283 bio->iovpos++; 2284 bio->iov_offset = 0; 2285 } 2286 2287 return 0; 2288 } 2289 2290 static void 2291 bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset) 2292 { 2293 struct nvme_bdev_io *bio = ref; 2294 struct iovec *iov; 2295 2296 bio->fused_iov_offset = sgl_offset; 2297 for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) { 2298 iov = &bio->fused_iovs[bio->fused_iovpos]; 2299 if (bio->fused_iov_offset < iov->iov_len) { 2300 break; 2301 } 2302 2303 bio->fused_iov_offset -= iov->iov_len; 2304 } 2305 } 2306 2307 static int 2308 bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length) 2309 { 2310 struct nvme_bdev_io *bio = ref; 2311 struct iovec *iov; 2312 2313 assert(bio->fused_iovpos < bio->fused_iovcnt); 2314 2315 iov = &bio->fused_iovs[bio->fused_iovpos]; 2316 2317 *address = iov->iov_base; 2318 *length = iov->iov_len; 2319 2320 if (bio->fused_iov_offset) { 2321 assert(bio->fused_iov_offset <= iov->iov_len); 2322 *address += bio->fused_iov_offset; 2323 *length -= bio->fused_iov_offset; 2324 } 2325 2326 bio->fused_iov_offset += *length; 2327 if (bio->fused_iov_offset == iov->iov_len) { 2328 bio->fused_iovpos++; 2329 bio->fused_iov_offset = 0; 2330 } 2331 2332 return 0; 2333 } 2334 2335 static int 2336 bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2337 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2338 void *md, uint64_t lba_count, uint64_t lba) 2339 { 2340 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2341 int rc; 2342 2343 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx without PI check\n", 2344 lba_count, lba); 2345 2346 bio->iovs = iov; 2347 bio->iovcnt = iovcnt; 2348 bio->iovpos = 0; 2349 bio->iov_offset = 0; 2350 2351 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2352 bdev_nvme_no_pi_readv_done, bio, 0, 2353 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2354 md, 0, 0); 2355 2356 if (rc != 0 && rc != -ENOMEM) { 2357 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc); 2358 } 2359 return rc; 2360 } 2361 2362 static int 2363 bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2364 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2365 void *md, uint64_t lba_count, uint64_t lba) 2366 { 2367 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2368 int rc; 2369 2370 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n", 2371 lba_count, lba); 2372 2373 bio->iovs = iov; 2374 bio->iovcnt = iovcnt; 2375 bio->iovpos = 0; 2376 bio->iov_offset = 0; 2377 2378 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2379 bdev_nvme_readv_done, bio, nbdev->disk.dif_check_flags, 2380 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2381 md, 0, 0); 2382 2383 if (rc != 0 && rc != -ENOMEM) { 2384 SPDK_ERRLOG("readv failed: rc = %d\n", rc); 2385 } 2386 return rc; 2387 } 2388 2389 static int 2390 bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2391 struct nvme_bdev_io *bio, 2392 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2393 { 2394 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2395 int rc; 2396 2397 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n", 2398 lba_count, lba); 2399 2400 bio->iovs = iov; 2401 bio->iovcnt = iovcnt; 2402 bio->iovpos = 0; 2403 bio->iov_offset = 0; 2404 2405 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2406 bdev_nvme_writev_done, bio, nbdev->disk.dif_check_flags, 2407 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2408 md, 0, 0); 2409 2410 if (rc != 0 && rc != -ENOMEM) { 2411 SPDK_ERRLOG("writev failed: rc = %d\n", rc); 2412 } 2413 return rc; 2414 } 2415 2416 static int 2417 bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2418 struct nvme_bdev_io *bio, 2419 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2420 { 2421 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2422 int rc; 2423 2424 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare %lu blocks with offset %#lx\n", 2425 lba_count, lba); 2426 2427 bio->iovs = iov; 2428 bio->iovcnt = iovcnt; 2429 bio->iovpos = 0; 2430 bio->iov_offset = 0; 2431 2432 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2433 bdev_nvme_comparev_done, bio, nbdev->disk.dif_check_flags, 2434 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2435 md, 0, 0); 2436 2437 if (rc != 0 && rc != -ENOMEM) { 2438 SPDK_ERRLOG("comparev failed: rc = %d\n", rc); 2439 } 2440 return rc; 2441 } 2442 2443 static int 2444 bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2445 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 2446 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2447 { 2448 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2449 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2450 uint32_t flags = nbdev->disk.dif_check_flags; 2451 int rc; 2452 2453 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare and write %lu blocks with offset %#lx\n", 2454 lba_count, lba); 2455 2456 bio->iovs = cmp_iov; 2457 bio->iovcnt = cmp_iovcnt; 2458 bio->iovpos = 0; 2459 bio->iov_offset = 0; 2460 bio->fused_iovs = write_iov; 2461 bio->fused_iovcnt = write_iovcnt; 2462 bio->fused_iovpos = 0; 2463 bio->fused_iov_offset = 0; 2464 2465 if (bdev_io->num_retries == 0) { 2466 bio->first_fused_submitted = false; 2467 } 2468 2469 if (!bio->first_fused_submitted) { 2470 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2471 memset(&bio->cpl, 0, sizeof(bio->cpl)); 2472 2473 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2474 bdev_nvme_comparev_and_writev_done, bio, flags, 2475 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0); 2476 if (rc == 0) { 2477 bio->first_fused_submitted = true; 2478 flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2479 } else { 2480 if (rc != -ENOMEM) { 2481 SPDK_ERRLOG("compare failed: rc = %d\n", rc); 2482 } 2483 return rc; 2484 } 2485 } 2486 2487 flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND; 2488 2489 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2490 bdev_nvme_comparev_and_writev_done, bio, flags, 2491 bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0); 2492 if (rc != 0 && rc != -ENOMEM) { 2493 SPDK_ERRLOG("write failed: rc = %d\n", rc); 2494 rc = 0; 2495 } 2496 2497 return rc; 2498 } 2499 2500 static int 2501 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2502 struct nvme_bdev_io *bio, 2503 uint64_t offset_blocks, 2504 uint64_t num_blocks) 2505 { 2506 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2507 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; 2508 struct spdk_nvme_dsm_range *range; 2509 uint64_t offset, remaining; 2510 uint64_t num_ranges_u64; 2511 uint16_t num_ranges; 2512 int rc; 2513 2514 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / 2515 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2516 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { 2517 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 2518 return -EINVAL; 2519 } 2520 num_ranges = (uint16_t)num_ranges_u64; 2521 2522 offset = offset_blocks; 2523 remaining = num_blocks; 2524 range = &dsm_ranges[0]; 2525 2526 /* Fill max-size ranges until the remaining blocks fit into one range */ 2527 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { 2528 range->attributes.raw = 0; 2529 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2530 range->starting_lba = offset; 2531 2532 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2533 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2534 range++; 2535 } 2536 2537 /* Final range describes the remaining blocks */ 2538 range->attributes.raw = 0; 2539 range->length = remaining; 2540 range->starting_lba = offset; 2541 2542 rc = spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, 2543 SPDK_NVME_DSM_ATTR_DEALLOCATE, 2544 dsm_ranges, num_ranges, 2545 bdev_nvme_queued_done, bio); 2546 2547 return rc; 2548 } 2549 2550 static int 2551 bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2552 struct nvme_bdev_io *bio, 2553 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2554 { 2555 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2556 2557 if (nbytes > max_xfer_size) { 2558 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2559 return -EINVAL; 2560 } 2561 2562 bio->orig_thread = spdk_io_channel_get_thread(ch); 2563 2564 return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_bdev_ctrlr->ctrlr, cmd, buf, 2565 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); 2566 } 2567 2568 static int 2569 bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2570 struct nvme_bdev_io *bio, 2571 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2572 { 2573 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2574 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2575 2576 if (nbytes > max_xfer_size) { 2577 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2578 return -EINVAL; 2579 } 2580 2581 /* 2582 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2583 * so fill it out automatically. 2584 */ 2585 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2586 2587 return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2588 (uint32_t)nbytes, bdev_nvme_queued_done, bio); 2589 } 2590 2591 static int 2592 bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2593 struct nvme_bdev_io *bio, 2594 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) 2595 { 2596 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2597 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->nvme_ns->ns); 2598 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2599 2600 if (nbytes > max_xfer_size) { 2601 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2602 return -EINVAL; 2603 } 2604 2605 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns)) { 2606 SPDK_ERRLOG("invalid meta data buffer size\n"); 2607 return -EINVAL; 2608 } 2609 2610 /* 2611 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2612 * so fill it out automatically. 2613 */ 2614 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2615 2616 return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2617 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); 2618 } 2619 2620 static void 2621 bdev_nvme_get_spdk_running_config(FILE *fp) 2622 { 2623 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2624 2625 fprintf(fp, "\n[Nvme]"); 2626 fprintf(fp, "\n" 2627 "# NVMe Device Whitelist\n" 2628 "# Users may specify which NVMe devices to claim by their transport id.\n" 2629 "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n" 2630 "# The second argument is the assigned name, which can be referenced from\n" 2631 "# other sections in the configuration file. For NVMe devices, a namespace\n" 2632 "# is automatically appended to each name in the format <YourName>nY, where\n" 2633 "# Y is the NSID (starts at 1).\n"); 2634 2635 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2636 const char *trtype; 2637 const char *prchk_flags; 2638 2639 trtype = spdk_nvme_transport_id_trtype_str(nvme_bdev_ctrlr->trid.trtype); 2640 if (!trtype) { 2641 continue; 2642 } 2643 2644 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 2645 fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n", 2646 trtype, 2647 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->name); 2648 } else { 2649 const char *adrfam; 2650 2651 adrfam = spdk_nvme_transport_id_adrfam_str(nvme_bdev_ctrlr->trid.adrfam); 2652 prchk_flags = spdk_nvme_prchk_flags_str(nvme_bdev_ctrlr->prchk_flags); 2653 2654 if (adrfam) { 2655 fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2656 trtype, adrfam, 2657 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid, 2658 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name); 2659 } else { 2660 fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2661 trtype, 2662 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid, 2663 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name); 2664 } 2665 2666 if (prchk_flags) { 2667 fprintf(fp, " \"%s\"\n", prchk_flags); 2668 } else { 2669 fprintf(fp, "\n"); 2670 } 2671 } 2672 } 2673 2674 fprintf(fp, "\n" 2675 "# The number of attempts per I/O when an I/O fails. Do not include\n" 2676 "# this key to get the default behavior.\n"); 2677 fprintf(fp, "RetryCount %d\n", g_opts.retry_count); 2678 fprintf(fp, "\n" 2679 "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n"); 2680 fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us); 2681 2682 fprintf(fp, "\n" 2683 "# Action to take on command time out. Only valid when Timeout is greater\n" 2684 "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n" 2685 "# the command, or 'None' to just print a message but do nothing.\n" 2686 "# Admin command timeouts will always result in a reset.\n"); 2687 switch (g_opts.action_on_timeout) { 2688 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 2689 fprintf(fp, "ActionOnTimeout None\n"); 2690 break; 2691 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 2692 fprintf(fp, "ActionOnTimeout Reset\n"); 2693 break; 2694 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 2695 fprintf(fp, "ActionOnTimeout Abort\n"); 2696 break; 2697 } 2698 2699 fprintf(fp, "\n" 2700 "# Set how often the admin queue is polled for asynchronous events.\n" 2701 "# Units in microseconds.\n"); 2702 fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us); 2703 fprintf(fp, "IOPollRate %" PRIu64"\n", g_opts.nvme_ioq_poll_period_us); 2704 fprintf(fp, "\n" 2705 "# Disable handling of hotplug (runtime insert and remove) events,\n" 2706 "# users can set to Yes if want to enable it.\n" 2707 "# Default: No\n"); 2708 fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No"); 2709 fprintf(fp, "\n" 2710 "# Set how often the hotplug is processed for insert and remove events." 2711 "# Units in microseconds.\n"); 2712 fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us); 2713 if (g_nvme_hostnqn) { 2714 fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn); 2715 } 2716 fprintf(fp, "DelayCmdSubmit %s\n", g_opts.delay_cmd_submit ? "True" : "False"); 2717 2718 fprintf(fp, "\n"); 2719 } 2720 2721 static void 2722 nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2723 { 2724 /* nop */ 2725 } 2726 2727 static void 2728 nvme_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2729 { 2730 g_config_json_namespace_fn[ns->type](w, ns); 2731 } 2732 2733 static int 2734 bdev_nvme_config_json(struct spdk_json_write_ctx *w) 2735 { 2736 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2737 struct spdk_nvme_transport_id *trid; 2738 const char *action; 2739 uint32_t nsid; 2740 2741 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { 2742 action = "reset"; 2743 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { 2744 action = "abort"; 2745 } else { 2746 action = "none"; 2747 } 2748 2749 spdk_json_write_object_begin(w); 2750 2751 spdk_json_write_named_string(w, "method", "bdev_nvme_set_options"); 2752 2753 spdk_json_write_named_object_begin(w, "params"); 2754 spdk_json_write_named_string(w, "action_on_timeout", action); 2755 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); 2756 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); 2757 spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst); 2758 spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight); 2759 spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight); 2760 spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight); 2761 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); 2762 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); 2763 spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); 2764 spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); 2765 spdk_json_write_object_end(w); 2766 2767 spdk_json_write_object_end(w); 2768 2769 pthread_mutex_lock(&g_bdev_nvme_mutex); 2770 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2771 trid = &nvme_bdev_ctrlr->trid; 2772 2773 spdk_json_write_object_begin(w); 2774 2775 spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller"); 2776 2777 spdk_json_write_named_object_begin(w, "params"); 2778 spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name); 2779 nvme_bdev_dump_trid_json(trid, w); 2780 spdk_json_write_named_bool(w, "prchk_reftag", 2781 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); 2782 spdk_json_write_named_bool(w, "prchk_guard", 2783 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); 2784 2785 spdk_json_write_object_end(w); 2786 2787 spdk_json_write_object_end(w); 2788 2789 for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) { 2790 if (!nvme_bdev_ctrlr->namespaces[nsid]->populated) { 2791 continue; 2792 } 2793 2794 nvme_namespace_config_json(w, nvme_bdev_ctrlr->namespaces[nsid]); 2795 } 2796 } 2797 2798 /* Dump as last parameter to give all NVMe bdevs chance to be constructed 2799 * before enabling hotplug poller. 2800 */ 2801 spdk_json_write_object_begin(w); 2802 spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug"); 2803 2804 spdk_json_write_named_object_begin(w, "params"); 2805 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); 2806 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); 2807 spdk_json_write_object_end(w); 2808 2809 spdk_json_write_object_end(w); 2810 2811 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2812 return 0; 2813 } 2814 2815 struct spdk_nvme_ctrlr * 2816 bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) 2817 { 2818 if (!bdev || bdev->module != &nvme_if) { 2819 return NULL; 2820 } 2821 2822 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_bdev_ctrlr->ctrlr; 2823 } 2824 2825 SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME) 2826