1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_nvme.h" 37 #include "bdev_ocssd.h" 38 39 #include "spdk/config.h" 40 #include "spdk/conf.h" 41 #include "spdk/endian.h" 42 #include "spdk/bdev.h" 43 #include "spdk/json.h" 44 #include "spdk/nvme.h" 45 #include "spdk/nvme_ocssd.h" 46 #include "spdk/thread.h" 47 #include "spdk/string.h" 48 #include "spdk/likely.h" 49 #include "spdk/util.h" 50 51 #include "spdk/bdev_module.h" 52 #include "spdk_internal/log.h" 53 54 #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true 55 56 static void bdev_nvme_get_spdk_running_config(FILE *fp); 57 static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); 58 59 struct nvme_bdev_io { 60 /** array of iovecs to transfer. */ 61 struct iovec *iovs; 62 63 /** Number of iovecs in iovs array. */ 64 int iovcnt; 65 66 /** Current iovec position. */ 67 int iovpos; 68 69 /** Offset in current iovec. */ 70 uint32_t iov_offset; 71 72 /** array of iovecs to transfer. */ 73 struct iovec *fused_iovs; 74 75 /** Number of iovecs in iovs array. */ 76 int fused_iovcnt; 77 78 /** Current iovec position. */ 79 int fused_iovpos; 80 81 /** Offset in current iovec. */ 82 uint32_t fused_iov_offset; 83 84 /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ 85 struct spdk_nvme_cpl cpl; 86 87 /** Originating thread */ 88 struct spdk_thread *orig_thread; 89 90 /** Keeps track if first of fused commands was submitted */ 91 bool first_fused_submitted; 92 }; 93 94 struct nvme_probe_ctx { 95 size_t count; 96 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; 97 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; 98 const char *names[NVME_MAX_CONTROLLERS]; 99 uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; 100 const char *hostnqn; 101 }; 102 103 struct nvme_probe_skip_entry { 104 struct spdk_nvme_transport_id trid; 105 TAILQ_ENTRY(nvme_probe_skip_entry) tailq; 106 }; 107 /* All the controllers deleted by users via RPC are skipped by hotplug monitor */ 108 static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER( 109 g_skipped_nvme_ctrlrs); 110 111 static struct spdk_bdev_nvme_opts g_opts = { 112 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, 113 .timeout_us = 0, 114 .retry_count = 4, 115 .arbitration_burst = 0, 116 .low_priority_weight = 0, 117 .medium_priority_weight = 0, 118 .high_priority_weight = 0, 119 .nvme_adminq_poll_period_us = 10000ULL, 120 .nvme_ioq_poll_period_us = 0, 121 .io_queue_requests = 0, 122 .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, 123 }; 124 125 #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL 126 #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL 127 128 static int g_hot_insert_nvme_controller_index = 0; 129 static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; 130 static bool g_nvme_hotplug_enabled = false; 131 static struct spdk_thread *g_bdev_nvme_init_thread; 132 static struct spdk_poller *g_hotplug_poller; 133 static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; 134 static char *g_nvme_hostnqn = NULL; 135 136 static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 137 struct nvme_async_probe_ctx *ctx); 138 static void nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx); 139 static int bdev_nvme_library_init(void); 140 static void bdev_nvme_library_fini(void); 141 static int bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 142 struct nvme_bdev_io *bio, 143 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 144 static int bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 145 struct nvme_bdev_io *bio, 146 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 147 static int bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 148 struct nvme_bdev_io *bio, 149 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 150 static int bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 151 struct nvme_bdev_io *bio, 152 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba); 153 static int bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 154 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 155 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba); 156 static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 157 struct nvme_bdev_io *bio, 158 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 159 static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 160 struct nvme_bdev_io *bio, 161 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); 162 static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 163 struct nvme_bdev_io *bio, 164 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); 165 static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio); 166 static int bdev_nvme_abort(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 167 struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort); 168 169 typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 170 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 171 static void nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 172 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); 173 174 static populate_namespace_fn g_populate_namespace_fn[] = { 175 NULL, 176 nvme_ctrlr_populate_standard_namespace, 177 bdev_ocssd_populate_namespace, 178 }; 179 180 typedef void (*depopulate_namespace_fn)(struct nvme_bdev_ns *ns); 181 static void nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns); 182 183 static depopulate_namespace_fn g_depopulate_namespace_fn[] = { 184 NULL, 185 nvme_ctrlr_depopulate_standard_namespace, 186 bdev_ocssd_depopulate_namespace, 187 }; 188 189 typedef void (*config_json_namespace_fn)(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns); 190 static void nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, 191 struct nvme_bdev_ns *ns); 192 193 static config_json_namespace_fn g_config_json_namespace_fn[] = { 194 NULL, 195 nvme_ctrlr_config_json_standard_namespace, 196 bdev_ocssd_namespace_config_json, 197 }; 198 199 struct spdk_nvme_qpair * 200 bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) 201 { 202 struct nvme_io_channel *nvme_ch; 203 204 nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); 205 206 return nvme_ch->qpair; 207 } 208 209 static int 210 bdev_nvme_get_ctx_size(void) 211 { 212 return sizeof(struct nvme_bdev_io); 213 } 214 215 static struct spdk_bdev_module nvme_if = { 216 .name = "nvme", 217 .async_fini = true, 218 .module_init = bdev_nvme_library_init, 219 .module_fini = bdev_nvme_library_fini, 220 .config_text = bdev_nvme_get_spdk_running_config, 221 .config_json = bdev_nvme_config_json, 222 .get_ctx_size = bdev_nvme_get_ctx_size, 223 224 }; 225 SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if) 226 227 static void 228 bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 229 { 230 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "qpar %p is disconnected, attempting reconnect.\n", qpair); 231 /* 232 * Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will 233 * reconnect a qpair and we will stop getting a callback for this one. 234 */ 235 spdk_nvme_ctrlr_reconnect_io_qpair(qpair); 236 } 237 238 static int 239 bdev_nvme_poll(void *arg) 240 { 241 struct nvme_bdev_poll_group *group = arg; 242 int64_t num_completions; 243 244 if (group->collect_spin_stat && group->start_ticks == 0) { 245 group->start_ticks = spdk_get_ticks(); 246 } 247 248 num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, 249 bdev_nvme_disconnected_qpair_cb); 250 if (group->collect_spin_stat) { 251 if (num_completions > 0) { 252 if (group->end_ticks != 0) { 253 group->spin_ticks += (group->end_ticks - group->start_ticks); 254 group->end_ticks = 0; 255 } 256 group->start_ticks = 0; 257 } else { 258 group->end_ticks = spdk_get_ticks(); 259 } 260 } 261 262 return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 263 } 264 265 static int 266 bdev_nvme_poll_adminq(void *arg) 267 { 268 int32_t rc; 269 struct spdk_nvme_ctrlr *ctrlr = arg; 270 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 271 272 rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr); 273 274 if (rc < 0) { 275 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 276 assert(nvme_bdev_ctrlr != NULL); 277 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 278 } 279 280 return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; 281 } 282 283 static int 284 bdev_nvme_destruct(void *ctx) 285 { 286 struct nvme_bdev *nvme_disk = ctx; 287 288 nvme_bdev_detach_bdev_from_ns(nvme_disk); 289 290 free(nvme_disk->disk.name); 291 free(nvme_disk); 292 293 return 0; 294 } 295 296 static int 297 bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio, 298 uint64_t offset, uint64_t nbytes) 299 { 300 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); 301 302 return 0; 303 } 304 305 static void 306 _bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i) 307 { 308 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 309 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 310 struct spdk_bdev_io *bdev_io; 311 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 312 313 /* A NULL ctx means success. */ 314 if (spdk_io_channel_iter_get_ctx(i) != NULL) { 315 status = SPDK_BDEV_IO_STATUS_FAILED; 316 } 317 318 while (!TAILQ_EMPTY(&nvme_ch->pending_resets)) { 319 bdev_io = TAILQ_FIRST(&nvme_ch->pending_resets); 320 TAILQ_REMOVE(&nvme_ch->pending_resets, bdev_io, module_link); 321 spdk_bdev_io_complete(bdev_io, status); 322 } 323 324 spdk_for_each_channel_continue(i, 0); 325 } 326 327 static void 328 _bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc) 329 { 330 /* we are using the for_each_channel cb_arg like a return code here. */ 331 /* If it's zero, we succeeded, otherwise, the reset failed. */ 332 void *cb_arg = NULL; 333 334 if (rc) { 335 cb_arg = (void *)0x1; 336 SPDK_ERRLOG("Resetting controller failed.\n"); 337 } else { 338 SPDK_NOTICELOG("Resetting controller successful.\n"); 339 } 340 341 pthread_mutex_lock(&g_bdev_nvme_mutex); 342 nvme_bdev_ctrlr->resetting = false; 343 pthread_mutex_unlock(&g_bdev_nvme_mutex); 344 /* Make sure we clear any pending resets before returning. */ 345 spdk_for_each_channel(nvme_bdev_ctrlr, 346 _bdev_nvme_complete_pending_resets, 347 cb_arg, NULL); 348 } 349 350 static void 351 _bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status) 352 { 353 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 354 void *ctx = spdk_io_channel_iter_get_ctx(i); 355 int rc = SPDK_BDEV_IO_STATUS_SUCCESS; 356 357 if (status) { 358 rc = SPDK_BDEV_IO_STATUS_FAILED; 359 } 360 if (ctx) { 361 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc); 362 } 363 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 364 } 365 366 static void 367 _bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) 368 { 369 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 370 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 371 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); 372 struct spdk_nvme_io_qpair_opts opts; 373 374 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 375 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 376 opts.create_only = true; 377 378 nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 379 if (!nvme_ch->qpair) { 380 spdk_for_each_channel_continue(i, -1); 381 return; 382 } 383 384 assert(nvme_ch->group != NULL); 385 if (spdk_nvme_poll_group_add(nvme_ch->group->group, nvme_ch->qpair) != 0) { 386 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n"); 387 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 388 spdk_for_each_channel_continue(i, -1); 389 return; 390 } 391 392 if (spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair)) { 393 SPDK_ERRLOG("Unable to connect I/O qpair.\n"); 394 spdk_nvme_poll_group_remove(nvme_ch->group->group, nvme_ch->qpair); 395 spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 396 spdk_for_each_channel_continue(i, -1); 397 return; 398 } 399 400 spdk_for_each_channel_continue(i, 0); 401 } 402 403 static void 404 _bdev_nvme_reset(struct spdk_io_channel_iter *i, int status) 405 { 406 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i); 407 struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i); 408 int rc; 409 410 if (status) { 411 if (bio) { 412 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 413 } 414 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status); 415 return; 416 } 417 418 rc = spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr); 419 if (rc != 0) { 420 if (bio) { 421 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 422 } 423 _bdev_nvme_reset_complete(nvme_bdev_ctrlr, rc); 424 return; 425 } 426 427 /* Recreate all of the I/O queue pairs */ 428 spdk_for_each_channel(nvme_bdev_ctrlr, 429 _bdev_nvme_reset_create_qpair, 430 bio, 431 _bdev_nvme_reset_create_qpairs_done); 432 433 434 } 435 436 static void 437 _bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) 438 { 439 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 440 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 441 int rc; 442 443 rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); 444 if (!rc) { 445 nvme_ch->qpair = NULL; 446 } 447 448 spdk_for_each_channel_continue(i, rc); 449 } 450 451 static int 452 bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio) 453 { 454 struct spdk_io_channel *ch; 455 struct nvme_io_channel *nvme_ch; 456 457 pthread_mutex_lock(&g_bdev_nvme_mutex); 458 if (nvme_bdev_ctrlr->destruct) { 459 /* Don't bother resetting if the controller is in the process of being destructed. */ 460 if (bio) { 461 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); 462 } 463 pthread_mutex_unlock(&g_bdev_nvme_mutex); 464 return 0; 465 } 466 467 if (!nvme_bdev_ctrlr->resetting) { 468 nvme_bdev_ctrlr->resetting = true; 469 } else { 470 pthread_mutex_unlock(&g_bdev_nvme_mutex); 471 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); 472 /* 473 * The internal reset calls won't be queued. This is on purpose so that we don't 474 * interfere with the app framework reset strategy. i.e. we are deferring to the 475 * upper level. If they are in the middle of a reset, we won't try to schedule another one. 476 */ 477 if (bio) { 478 ch = spdk_get_io_channel(nvme_bdev_ctrlr); 479 assert(ch != NULL); 480 nvme_ch = spdk_io_channel_get_ctx(ch); 481 TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link); 482 spdk_put_io_channel(ch); 483 } 484 return 0; 485 } 486 487 pthread_mutex_unlock(&g_bdev_nvme_mutex); 488 /* First, delete all NVMe I/O queue pairs. */ 489 spdk_for_each_channel(nvme_bdev_ctrlr, 490 _bdev_nvme_reset_destroy_qpair, 491 bio, 492 _bdev_nvme_reset); 493 494 return 0; 495 } 496 497 static int 498 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 499 struct nvme_bdev_io *bio, 500 uint64_t offset_blocks, 501 uint64_t num_blocks); 502 503 static void 504 bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 505 bool success) 506 { 507 int ret; 508 509 if (!success) { 510 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 511 return; 512 } 513 514 ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 515 ch, 516 (struct nvme_bdev_io *)bdev_io->driver_ctx, 517 bdev_io->u.bdev.iovs, 518 bdev_io->u.bdev.iovcnt, 519 bdev_io->u.bdev.md_buf, 520 bdev_io->u.bdev.num_blocks, 521 bdev_io->u.bdev.offset_blocks); 522 523 if (spdk_likely(ret == 0)) { 524 return; 525 } else if (ret == -ENOMEM) { 526 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 527 } else { 528 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 529 } 530 } 531 532 static int 533 _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 534 { 535 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 536 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 537 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; 538 struct nvme_bdev_io *nbdev_io_to_abort; 539 540 if (nvme_ch->qpair == NULL) { 541 /* The device is currently resetting */ 542 return -1; 543 } 544 545 switch (bdev_io->type) { 546 case SPDK_BDEV_IO_TYPE_READ: 547 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, 548 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 549 return 0; 550 551 case SPDK_BDEV_IO_TYPE_WRITE: 552 return bdev_nvme_writev(nbdev, 553 ch, 554 nbdev_io, 555 bdev_io->u.bdev.iovs, 556 bdev_io->u.bdev.iovcnt, 557 bdev_io->u.bdev.md_buf, 558 bdev_io->u.bdev.num_blocks, 559 bdev_io->u.bdev.offset_blocks); 560 561 case SPDK_BDEV_IO_TYPE_COMPARE: 562 return bdev_nvme_comparev(nbdev, 563 ch, 564 nbdev_io, 565 bdev_io->u.bdev.iovs, 566 bdev_io->u.bdev.iovcnt, 567 bdev_io->u.bdev.md_buf, 568 bdev_io->u.bdev.num_blocks, 569 bdev_io->u.bdev.offset_blocks); 570 571 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 572 return bdev_nvme_comparev_and_writev(nbdev, 573 ch, 574 nbdev_io, 575 bdev_io->u.bdev.iovs, 576 bdev_io->u.bdev.iovcnt, 577 bdev_io->u.bdev.fused_iovs, 578 bdev_io->u.bdev.fused_iovcnt, 579 bdev_io->u.bdev.md_buf, 580 bdev_io->u.bdev.num_blocks, 581 bdev_io->u.bdev.offset_blocks); 582 583 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 584 return bdev_nvme_unmap(nbdev, 585 ch, 586 nbdev_io, 587 bdev_io->u.bdev.offset_blocks, 588 bdev_io->u.bdev.num_blocks); 589 590 case SPDK_BDEV_IO_TYPE_UNMAP: 591 return bdev_nvme_unmap(nbdev, 592 ch, 593 nbdev_io, 594 bdev_io->u.bdev.offset_blocks, 595 bdev_io->u.bdev.num_blocks); 596 597 case SPDK_BDEV_IO_TYPE_RESET: 598 return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io); 599 600 case SPDK_BDEV_IO_TYPE_FLUSH: 601 return bdev_nvme_flush(nbdev, 602 nbdev_io, 603 bdev_io->u.bdev.offset_blocks, 604 bdev_io->u.bdev.num_blocks); 605 606 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 607 return bdev_nvme_admin_passthru(nbdev, 608 ch, 609 nbdev_io, 610 &bdev_io->u.nvme_passthru.cmd, 611 bdev_io->u.nvme_passthru.buf, 612 bdev_io->u.nvme_passthru.nbytes); 613 614 case SPDK_BDEV_IO_TYPE_NVME_IO: 615 return bdev_nvme_io_passthru(nbdev, 616 ch, 617 nbdev_io, 618 &bdev_io->u.nvme_passthru.cmd, 619 bdev_io->u.nvme_passthru.buf, 620 bdev_io->u.nvme_passthru.nbytes); 621 622 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 623 return bdev_nvme_io_passthru_md(nbdev, 624 ch, 625 nbdev_io, 626 &bdev_io->u.nvme_passthru.cmd, 627 bdev_io->u.nvme_passthru.buf, 628 bdev_io->u.nvme_passthru.nbytes, 629 bdev_io->u.nvme_passthru.md_buf, 630 bdev_io->u.nvme_passthru.md_len); 631 632 case SPDK_BDEV_IO_TYPE_ABORT: 633 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx; 634 return bdev_nvme_abort(nbdev, 635 ch, 636 nbdev_io, 637 nbdev_io_to_abort); 638 639 default: 640 return -EINVAL; 641 } 642 return 0; 643 } 644 645 static void 646 bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 647 { 648 int rc = _bdev_nvme_submit_request(ch, bdev_io); 649 650 if (spdk_unlikely(rc != 0)) { 651 if (rc == -ENOMEM) { 652 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 653 } else { 654 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 655 } 656 } 657 } 658 659 static bool 660 bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 661 { 662 struct nvme_bdev *nbdev = ctx; 663 const struct spdk_nvme_ctrlr_data *cdata; 664 665 switch (io_type) { 666 case SPDK_BDEV_IO_TYPE_READ: 667 case SPDK_BDEV_IO_TYPE_WRITE: 668 case SPDK_BDEV_IO_TYPE_RESET: 669 case SPDK_BDEV_IO_TYPE_FLUSH: 670 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 671 case SPDK_BDEV_IO_TYPE_NVME_IO: 672 case SPDK_BDEV_IO_TYPE_ABORT: 673 return true; 674 675 case SPDK_BDEV_IO_TYPE_COMPARE: 676 return spdk_nvme_ns_supports_compare(nbdev->nvme_ns->ns); 677 678 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 679 return spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns) ? true : false; 680 681 case SPDK_BDEV_IO_TYPE_UNMAP: 682 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 683 return cdata->oncs.dsm; 684 685 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 686 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr); 687 /* 688 * If an NVMe controller guarantees reading unallocated blocks returns zero, 689 * we can implement WRITE_ZEROES as an NVMe deallocate command. 690 */ 691 if (cdata->oncs.dsm && 692 spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->nvme_ns->ns) == 693 SPDK_NVME_DEALLOC_READ_00) { 694 return true; 695 } 696 /* 697 * The NVMe controller write_zeroes function is currently not used by our driver. 698 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail. 699 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read. 700 */ 701 return false; 702 703 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 704 if (spdk_nvme_ctrlr_get_flags(nbdev->nvme_bdev_ctrlr->ctrlr) & 705 SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) { 706 return true; 707 } 708 return false; 709 710 default: 711 return false; 712 } 713 } 714 715 static int 716 bdev_nvme_create_cb(void *io_device, void *ctx_buf) 717 { 718 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 719 struct nvme_io_channel *ch = ctx_buf; 720 struct spdk_nvme_io_qpair_opts opts; 721 struct spdk_io_channel *pg_ch = NULL; 722 int rc; 723 724 spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 725 opts.delay_cmd_submit = g_opts.delay_cmd_submit; 726 opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests); 727 opts.create_only = true; 728 g_opts.io_queue_requests = opts.io_queue_requests; 729 730 ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts)); 731 732 if (ch->qpair == NULL) { 733 return -1; 734 } 735 736 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 737 if (bdev_ocssd_create_io_channel(ch)) { 738 goto err; 739 } 740 } 741 742 pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs); 743 if (!pg_ch) { 744 goto err; 745 } 746 747 ch->group = spdk_io_channel_get_ctx(pg_ch); 748 if (spdk_nvme_poll_group_add(ch->group->group, ch->qpair) != 0) { 749 goto err; 750 } 751 752 rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, ch->qpair); 753 if (rc) { 754 spdk_nvme_poll_group_remove(ch->group->group, ch->qpair); 755 goto err; 756 } 757 758 #ifdef SPDK_CONFIG_VTUNE 759 ch->group->collect_spin_stat = true; 760 #else 761 ch->group->collect_spin_stat = false; 762 #endif 763 764 TAILQ_INIT(&ch->pending_resets); 765 return 0; 766 767 err: 768 if (pg_ch) { 769 spdk_put_io_channel(pg_ch); 770 } 771 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 772 return -1; 773 } 774 775 static void 776 bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) 777 { 778 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device; 779 struct nvme_io_channel *ch = ctx_buf; 780 struct nvme_bdev_poll_group *group; 781 782 group = ch->group; 783 assert(group != NULL); 784 785 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 786 bdev_ocssd_destroy_io_channel(ch); 787 } 788 789 if (ch->qpair != NULL) { 790 spdk_nvme_poll_group_remove(group->group, ch->qpair); 791 } 792 spdk_put_io_channel(spdk_io_channel_from_ctx(group)); 793 794 spdk_nvme_ctrlr_free_io_qpair(ch->qpair); 795 } 796 797 static int 798 bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf) 799 { 800 struct nvme_bdev_poll_group *group = ctx_buf; 801 802 group->group = spdk_nvme_poll_group_create(group); 803 if (group->group == NULL) { 804 return -1; 805 } 806 807 group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); 808 809 if (group->poller == NULL) { 810 spdk_nvme_poll_group_destroy(group->group); 811 return -1; 812 } 813 814 return 0; 815 } 816 817 static void 818 bdev_nvme_poll_group_destroy_cb(void *io_device, void *ctx_buf) 819 { 820 struct nvme_bdev_poll_group *group = ctx_buf; 821 822 spdk_poller_unregister(&group->poller); 823 if (spdk_nvme_poll_group_destroy(group->group)) { 824 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module."); 825 assert(false); 826 } 827 } 828 829 static struct spdk_io_channel * 830 bdev_nvme_get_io_channel(void *ctx) 831 { 832 struct nvme_bdev *nvme_bdev = ctx; 833 834 return spdk_get_io_channel(nvme_bdev->nvme_bdev_ctrlr); 835 } 836 837 static int 838 bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 839 { 840 struct nvme_bdev *nvme_bdev = ctx; 841 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr; 842 const struct spdk_nvme_ctrlr_data *cdata; 843 struct spdk_nvme_ns *ns; 844 union spdk_nvme_vs_register vs; 845 union spdk_nvme_csts_register csts; 846 char buf[128]; 847 848 cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 849 vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 850 csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_bdev_ctrlr->ctrlr); 851 ns = nvme_bdev->nvme_ns->ns; 852 853 spdk_json_write_named_object_begin(w, "nvme"); 854 855 if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 856 spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->trid->traddr); 857 } 858 859 spdk_json_write_named_object_begin(w, "trid"); 860 861 nvme_bdev_dump_trid_json(nvme_bdev_ctrlr->trid, w); 862 863 spdk_json_write_object_end(w); 864 865 #ifdef SPDK_CONFIG_NVME_CUSE 866 size_t cuse_name_size = 128; 867 char cuse_name[cuse_name_size]; 868 869 int rc = spdk_nvme_cuse_get_ns_name(nvme_bdev->nvme_bdev_ctrlr->ctrlr, spdk_nvme_ns_get_id(ns), 870 cuse_name, &cuse_name_size); 871 if (rc == 0) { 872 spdk_json_write_named_string(w, "cuse_device", cuse_name); 873 } 874 #endif 875 876 spdk_json_write_named_object_begin(w, "ctrlr_data"); 877 878 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); 879 880 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); 881 spdk_str_trim(buf); 882 spdk_json_write_named_string(w, "model_number", buf); 883 884 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); 885 spdk_str_trim(buf); 886 spdk_json_write_named_string(w, "serial_number", buf); 887 888 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); 889 spdk_str_trim(buf); 890 spdk_json_write_named_string(w, "firmware_revision", buf); 891 892 spdk_json_write_named_object_begin(w, "oacs"); 893 894 spdk_json_write_named_uint32(w, "security", cdata->oacs.security); 895 spdk_json_write_named_uint32(w, "format", cdata->oacs.format); 896 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); 897 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); 898 899 spdk_json_write_object_end(w); 900 901 spdk_json_write_object_end(w); 902 903 spdk_json_write_named_object_begin(w, "vs"); 904 905 spdk_json_write_name(w, "nvme_version"); 906 if (vs.bits.ter) { 907 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); 908 } else { 909 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); 910 } 911 912 spdk_json_write_object_end(w); 913 914 spdk_json_write_named_object_begin(w, "csts"); 915 916 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); 917 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); 918 919 spdk_json_write_object_end(w); 920 921 spdk_json_write_named_object_begin(w, "ns_data"); 922 923 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); 924 925 spdk_json_write_object_end(w); 926 927 if (cdata->oacs.security) { 928 spdk_json_write_named_object_begin(w, "security"); 929 930 spdk_json_write_named_bool(w, "opal", nvme_bdev_ctrlr->opal_dev ? true : false); 931 932 spdk_json_write_object_end(w); 933 } 934 935 spdk_json_write_object_end(w); 936 937 return 0; 938 } 939 940 static void 941 bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 942 { 943 /* No config per bdev needed */ 944 } 945 946 static uint64_t 947 bdev_nvme_get_spin_time(struct spdk_io_channel *ch) 948 { 949 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 950 struct nvme_bdev_poll_group *group = nvme_ch->group; 951 uint64_t spin_time; 952 953 if (!group || !group->collect_spin_stat) { 954 return 0; 955 } 956 957 if (group->end_ticks != 0) { 958 group->spin_ticks += (group->end_ticks - group->start_ticks); 959 group->end_ticks = 0; 960 } 961 962 spin_time = (group->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); 963 group->start_ticks = 0; 964 group->spin_ticks = 0; 965 966 return spin_time; 967 } 968 969 static const struct spdk_bdev_fn_table nvmelib_fn_table = { 970 .destruct = bdev_nvme_destruct, 971 .submit_request = bdev_nvme_submit_request, 972 .io_type_supported = bdev_nvme_io_type_supported, 973 .get_io_channel = bdev_nvme_get_io_channel, 974 .dump_info_json = bdev_nvme_dump_info_json, 975 .write_config_json = bdev_nvme_write_config_json, 976 .get_spin_time = bdev_nvme_get_spin_time, 977 }; 978 979 static void 980 nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 981 struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx) 982 { 983 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 984 struct nvme_bdev *bdev; 985 struct spdk_nvme_ns *ns; 986 const struct spdk_uuid *uuid; 987 const struct spdk_nvme_ctrlr_data *cdata; 988 const struct spdk_nvme_ns_data *nsdata; 989 int rc; 990 991 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 992 993 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id); 994 if (!ns) { 995 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nvme_ns->id); 996 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -EINVAL); 997 return; 998 } 999 1000 bdev = calloc(1, sizeof(*bdev)); 1001 if (!bdev) { 1002 SPDK_ERRLOG("bdev calloc() failed\n"); 1003 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 1004 return; 1005 } 1006 1007 bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr; 1008 nvme_ns->ns = ns; 1009 bdev->nvme_ns = nvme_ns; 1010 1011 bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns)); 1012 if (!bdev->disk.name) { 1013 free(bdev); 1014 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM); 1015 return; 1016 } 1017 bdev->disk.product_name = "NVMe disk"; 1018 1019 bdev->disk.write_cache = 0; 1020 if (cdata->vwc.present) { 1021 /* Enable if the Volatile Write Cache exists */ 1022 bdev->disk.write_cache = 1; 1023 } 1024 bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns); 1025 bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); 1026 bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); 1027 1028 uuid = spdk_nvme_ns_get_uuid(ns); 1029 if (uuid != NULL) { 1030 bdev->disk.uuid = *uuid; 1031 } 1032 1033 nsdata = spdk_nvme_ns_get_data(ns); 1034 1035 bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns); 1036 if (bdev->disk.md_len != 0) { 1037 bdev->disk.md_interleave = nsdata->flbas.extended; 1038 bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns); 1039 if (bdev->disk.dif_type != SPDK_DIF_DISABLE) { 1040 bdev->disk.dif_is_head_of_md = nsdata->dps.md_start; 1041 bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags; 1042 } 1043 } 1044 1045 if (!bdev_nvme_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) { 1046 bdev->disk.acwu = 0; 1047 } else if (nsdata->nsfeat.ns_atomic_write_unit) { 1048 bdev->disk.acwu = nsdata->nacwu; 1049 } else { 1050 bdev->disk.acwu = cdata->acwu; 1051 } 1052 1053 bdev->disk.ctxt = bdev; 1054 bdev->disk.fn_table = &nvmelib_fn_table; 1055 bdev->disk.module = &nvme_if; 1056 rc = spdk_bdev_register(&bdev->disk); 1057 if (rc) { 1058 free(bdev->disk.name); 1059 free(bdev); 1060 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, rc); 1061 return; 1062 } 1063 1064 nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev); 1065 nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0); 1066 } 1067 1068 static bool 1069 hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1070 struct spdk_nvme_ctrlr_opts *opts) 1071 { 1072 struct nvme_probe_skip_entry *entry; 1073 1074 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) { 1075 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1076 return false; 1077 } 1078 } 1079 1080 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1081 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1082 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1083 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1084 1085 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr); 1086 1087 return true; 1088 } 1089 1090 static bool 1091 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1092 struct spdk_nvme_ctrlr_opts *opts) 1093 { 1094 struct nvme_probe_ctx *ctx = cb_ctx; 1095 1096 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr); 1097 1098 if (nvme_bdev_ctrlr_get(trid)) { 1099 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1100 trid->traddr); 1101 return false; 1102 } 1103 1104 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1105 bool claim_device = false; 1106 size_t i; 1107 1108 for (i = 0; i < ctx->count; i++) { 1109 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1110 claim_device = true; 1111 break; 1112 } 1113 } 1114 1115 if (!claim_device) { 1116 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr); 1117 return false; 1118 } 1119 } 1120 1121 if (ctx->hostnqn) { 1122 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); 1123 } 1124 1125 opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; 1126 opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; 1127 opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; 1128 opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; 1129 1130 return true; 1131 } 1132 1133 static void 1134 nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) 1135 { 1136 struct spdk_nvme_ctrlr *ctrlr = ctx; 1137 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1138 1139 if (spdk_nvme_cpl_is_error(cpl)) { 1140 SPDK_WARNLOG("Abort failed. Resetting controller.\n"); 1141 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1142 assert(nvme_bdev_ctrlr != NULL); 1143 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1144 } 1145 } 1146 1147 static void 1148 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, 1149 struct spdk_nvme_qpair *qpair, uint16_t cid) 1150 { 1151 int rc; 1152 union spdk_nvme_csts_register csts; 1153 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1154 1155 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); 1156 1157 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); 1158 if (csts.bits.cfs) { 1159 SPDK_ERRLOG("Controller Fatal Status, reset required\n"); 1160 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1161 assert(nvme_bdev_ctrlr != NULL); 1162 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1163 return; 1164 } 1165 1166 switch (g_opts.action_on_timeout) { 1167 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 1168 if (qpair) { 1169 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, 1170 nvme_abort_cpl, ctrlr); 1171 if (rc == 0) { 1172 return; 1173 } 1174 1175 SPDK_ERRLOG("Unable to send abort. Resetting.\n"); 1176 } 1177 1178 /* FALLTHROUGH */ 1179 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 1180 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); 1181 assert(nvme_bdev_ctrlr != NULL); 1182 bdev_nvme_reset(nvme_bdev_ctrlr, NULL); 1183 break; 1184 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 1185 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n"); 1186 break; 1187 default: 1188 SPDK_ERRLOG("An invalid timeout action value is found.\n"); 1189 break; 1190 } 1191 } 1192 1193 void 1194 nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) 1195 { 1196 pthread_mutex_lock(&g_bdev_nvme_mutex); 1197 nvme_bdev_ctrlr->ref--; 1198 1199 if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) { 1200 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1201 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1202 return; 1203 } 1204 1205 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1206 } 1207 1208 static void 1209 nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns) 1210 { 1211 struct nvme_bdev *bdev, *tmp; 1212 1213 TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { 1214 spdk_bdev_unregister(&bdev->disk, NULL, NULL); 1215 } 1216 1217 ns->populated = false; 1218 1219 nvme_ctrlr_depopulate_namespace_done(ns->ctrlr); 1220 } 1221 1222 static void nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, 1223 struct nvme_async_probe_ctx *ctx) 1224 { 1225 g_populate_namespace_fn[ns->type](ctrlr, ns, ctx); 1226 } 1227 1228 static void nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns) 1229 { 1230 g_depopulate_namespace_fn[ns->type](ns); 1231 } 1232 1233 void 1234 nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx, 1235 struct nvme_bdev_ns *ns, int rc) 1236 { 1237 if (rc == 0) { 1238 ns->populated = true; 1239 pthread_mutex_lock(&g_bdev_nvme_mutex); 1240 ns->ctrlr->ref++; 1241 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1242 } else { 1243 memset(ns, 0, sizeof(*ns)); 1244 } 1245 1246 if (ctx) { 1247 ctx->populates_in_progress--; 1248 if (ctx->populates_in_progress == 0) { 1249 nvme_ctrlr_populate_namespaces_done(ctx); 1250 } 1251 } 1252 } 1253 1254 static void 1255 nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, 1256 struct nvme_async_probe_ctx *ctx) 1257 { 1258 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; 1259 struct nvme_bdev_ns *ns; 1260 struct spdk_nvme_ns *nvme_ns; 1261 struct nvme_bdev *bdev; 1262 uint32_t i; 1263 int rc; 1264 uint64_t num_sectors; 1265 bool ns_is_active; 1266 1267 if (ctx) { 1268 /* Initialize this count to 1 to handle the populate functions 1269 * calling nvme_ctrlr_populate_namespace_done() immediately. 1270 */ 1271 ctx->populates_in_progress = 1; 1272 } 1273 1274 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1275 uint32_t nsid = i + 1; 1276 1277 ns = nvme_bdev_ctrlr->namespaces[i]; 1278 ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); 1279 1280 if (ns->populated && ns_is_active && ns->type == NVME_BDEV_NS_STANDARD) { 1281 /* NS is still there but attributes may have changed */ 1282 nvme_ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 1283 num_sectors = spdk_nvme_ns_get_num_sectors(nvme_ns); 1284 bdev = TAILQ_FIRST(&ns->bdevs); 1285 if (bdev->disk.blockcnt != num_sectors) { 1286 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %lu, new size %lu\n", 1287 nsid, 1288 bdev->disk.name, 1289 bdev->disk.blockcnt, 1290 num_sectors); 1291 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors); 1292 if (rc != 0) { 1293 SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n", 1294 bdev->disk.name, rc); 1295 } 1296 } 1297 } 1298 1299 if (!ns->populated && ns_is_active) { 1300 ns->id = nsid; 1301 ns->ctrlr = nvme_bdev_ctrlr; 1302 if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { 1303 ns->type = NVME_BDEV_NS_OCSSD; 1304 } else { 1305 ns->type = NVME_BDEV_NS_STANDARD; 1306 } 1307 1308 TAILQ_INIT(&ns->bdevs); 1309 1310 if (ctx) { 1311 ctx->populates_in_progress++; 1312 } 1313 nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx); 1314 } 1315 1316 if (ns->populated && !ns_is_active) { 1317 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1318 } 1319 } 1320 1321 if (ctx) { 1322 /* Decrement this count now that the loop is over to account 1323 * for the one we started with. If the count is then 0, we 1324 * know any populate_namespace functions completed immediately, 1325 * so we'll kick the callback here. 1326 */ 1327 ctx->populates_in_progress--; 1328 if (ctx->populates_in_progress == 0) { 1329 nvme_ctrlr_populate_namespaces_done(ctx); 1330 } 1331 } 1332 1333 } 1334 1335 static void 1336 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) 1337 { 1338 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg; 1339 union spdk_nvme_async_event_completion event; 1340 1341 if (spdk_nvme_cpl_is_error(cpl)) { 1342 SPDK_WARNLOG("AER request execute failed"); 1343 return; 1344 } 1345 1346 event.raw = cpl->cdw0; 1347 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && 1348 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { 1349 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1350 } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR) && 1351 (event.bits.log_page_identifier == SPDK_OCSSD_LOG_CHUNK_NOTIFICATION) && 1352 spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1353 bdev_ocssd_handle_chunk_notification(nvme_bdev_ctrlr); 1354 } 1355 } 1356 1357 static int 1358 create_ctrlr(struct spdk_nvme_ctrlr *ctrlr, 1359 const char *name, 1360 const struct spdk_nvme_transport_id *trid, 1361 uint32_t prchk_flags) 1362 { 1363 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1364 uint32_t i; 1365 int rc; 1366 1367 nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr)); 1368 if (nvme_bdev_ctrlr == NULL) { 1369 SPDK_ERRLOG("Failed to allocate device struct\n"); 1370 return -ENOMEM; 1371 } 1372 1373 nvme_bdev_ctrlr->trid = calloc(1, sizeof(*nvme_bdev_ctrlr->trid)); 1374 if (nvme_bdev_ctrlr->trid == NULL) { 1375 SPDK_ERRLOG("Failed to allocate device trid struct\n"); 1376 free(nvme_bdev_ctrlr); 1377 return -ENOMEM; 1378 } 1379 1380 nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 1381 nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *)); 1382 if (!nvme_bdev_ctrlr->namespaces) { 1383 SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); 1384 free(nvme_bdev_ctrlr->trid); 1385 free(nvme_bdev_ctrlr); 1386 return -ENOMEM; 1387 } 1388 1389 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1390 nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns)); 1391 if (nvme_bdev_ctrlr->namespaces[i] == NULL) { 1392 SPDK_ERRLOG("Failed to allocate block namespace struct\n"); 1393 for (; i > 0; i--) { 1394 free(nvme_bdev_ctrlr->namespaces[i - 1]); 1395 } 1396 free(nvme_bdev_ctrlr->namespaces); 1397 free(nvme_bdev_ctrlr->trid); 1398 free(nvme_bdev_ctrlr); 1399 return -ENOMEM; 1400 } 1401 } 1402 1403 nvme_bdev_ctrlr->thread = spdk_get_thread(); 1404 nvme_bdev_ctrlr->adminq_timer_poller = NULL; 1405 nvme_bdev_ctrlr->ctrlr = ctrlr; 1406 nvme_bdev_ctrlr->ref = 0; 1407 *nvme_bdev_ctrlr->trid = *trid; 1408 nvme_bdev_ctrlr->name = strdup(name); 1409 if (nvme_bdev_ctrlr->name == NULL) { 1410 free(nvme_bdev_ctrlr->namespaces); 1411 free(nvme_bdev_ctrlr->trid); 1412 free(nvme_bdev_ctrlr); 1413 return -ENOMEM; 1414 } 1415 1416 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) { 1417 rc = bdev_ocssd_init_ctrlr(nvme_bdev_ctrlr); 1418 if (spdk_unlikely(rc != 0)) { 1419 SPDK_ERRLOG("Unable to initialize OCSSD controller\n"); 1420 free(nvme_bdev_ctrlr->name); 1421 free(nvme_bdev_ctrlr->namespaces); 1422 free(nvme_bdev_ctrlr->trid); 1423 free(nvme_bdev_ctrlr); 1424 return rc; 1425 } 1426 } 1427 1428 nvme_bdev_ctrlr->prchk_flags = prchk_flags; 1429 1430 spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb, 1431 sizeof(struct nvme_io_channel), 1432 name); 1433 1434 nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, ctrlr, 1435 g_opts.nvme_adminq_poll_period_us); 1436 1437 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq); 1438 1439 if (g_opts.timeout_us > 0) { 1440 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, 1441 timeout_cb, NULL); 1442 } 1443 1444 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr); 1445 1446 if (spdk_nvme_ctrlr_get_flags(nvme_bdev_ctrlr->ctrlr) & 1447 SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { 1448 nvme_bdev_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_bdev_ctrlr->ctrlr); 1449 if (nvme_bdev_ctrlr->opal_dev == NULL) { 1450 SPDK_ERRLOG("Failed to initialize Opal\n"); 1451 } 1452 } 1453 return 0; 1454 } 1455 1456 static void 1457 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1458 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1459 { 1460 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1461 struct nvme_probe_ctx *ctx = cb_ctx; 1462 char *name = NULL; 1463 uint32_t prchk_flags = 0; 1464 size_t i; 1465 1466 if (ctx) { 1467 for (i = 0; i < ctx->count; i++) { 1468 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { 1469 prchk_flags = ctx->prchk_flags[i]; 1470 name = strdup(ctx->names[i]); 1471 break; 1472 } 1473 } 1474 } else { 1475 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); 1476 } 1477 if (!name) { 1478 SPDK_ERRLOG("Failed to assign name to NVMe device\n"); 1479 return; 1480 } 1481 1482 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name); 1483 1484 create_ctrlr(ctrlr, name, trid, prchk_flags); 1485 1486 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); 1487 if (!nvme_bdev_ctrlr) { 1488 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 1489 free(name); 1490 return; 1491 } 1492 1493 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 1494 1495 free(name); 1496 } 1497 1498 static void 1499 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 1500 { 1501 uint32_t i; 1502 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1503 struct nvme_bdev_ns *ns; 1504 1505 pthread_mutex_lock(&g_bdev_nvme_mutex); 1506 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 1507 if (nvme_bdev_ctrlr->ctrlr == ctrlr) { 1508 /* The controller's destruction was already started */ 1509 if (nvme_bdev_ctrlr->destruct) { 1510 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1511 return; 1512 } 1513 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1514 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1515 uint32_t nsid = i + 1; 1516 1517 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1518 if (ns->populated) { 1519 assert(ns->id == nsid); 1520 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 1521 } 1522 } 1523 1524 pthread_mutex_lock(&g_bdev_nvme_mutex); 1525 nvme_bdev_ctrlr->destruct = true; 1526 if (nvme_bdev_ctrlr->ref == 0) { 1527 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1528 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 1529 } else { 1530 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1531 } 1532 return; 1533 } 1534 } 1535 pthread_mutex_unlock(&g_bdev_nvme_mutex); 1536 } 1537 1538 static int 1539 bdev_nvme_hotplug(void *arg) 1540 { 1541 struct spdk_nvme_transport_id trid_pcie; 1542 int done; 1543 1544 if (!g_hotplug_probe_ctx) { 1545 memset(&trid_pcie, 0, sizeof(trid_pcie)); 1546 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 1547 1548 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL, 1549 hotplug_probe_cb, 1550 attach_cb, remove_cb); 1551 if (!g_hotplug_probe_ctx) { 1552 return SPDK_POLLER_BUSY; 1553 } 1554 } 1555 1556 done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx); 1557 if (done != -EAGAIN) { 1558 g_hotplug_probe_ctx = NULL; 1559 } 1560 1561 return SPDK_POLLER_BUSY; 1562 } 1563 1564 void 1565 bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) 1566 { 1567 *opts = g_opts; 1568 } 1569 1570 int 1571 bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) 1572 { 1573 if (g_bdev_nvme_init_thread != NULL) { 1574 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 1575 return -EPERM; 1576 } 1577 } 1578 1579 g_opts = *opts; 1580 1581 return 0; 1582 } 1583 1584 struct set_nvme_hotplug_ctx { 1585 uint64_t period_us; 1586 bool enabled; 1587 spdk_msg_fn fn; 1588 void *fn_ctx; 1589 }; 1590 1591 static void 1592 set_nvme_hotplug_period_cb(void *_ctx) 1593 { 1594 struct set_nvme_hotplug_ctx *ctx = _ctx; 1595 1596 spdk_poller_unregister(&g_hotplug_poller); 1597 if (ctx->enabled) { 1598 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us); 1599 } 1600 1601 g_nvme_hotplug_poll_period_us = ctx->period_us; 1602 g_nvme_hotplug_enabled = ctx->enabled; 1603 if (ctx->fn) { 1604 ctx->fn(ctx->fn_ctx); 1605 } 1606 1607 free(ctx); 1608 } 1609 1610 int 1611 bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx) 1612 { 1613 struct set_nvme_hotplug_ctx *ctx; 1614 1615 if (enabled == true && !spdk_process_is_primary()) { 1616 return -EPERM; 1617 } 1618 1619 ctx = calloc(1, sizeof(*ctx)); 1620 if (ctx == NULL) { 1621 return -ENOMEM; 1622 } 1623 1624 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; 1625 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); 1626 ctx->enabled = enabled; 1627 ctx->fn = cb; 1628 ctx->fn_ctx = cb_ctx; 1629 1630 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); 1631 return 0; 1632 } 1633 1634 static void 1635 populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc) 1636 { 1637 if (ctx->cb_fn) { 1638 ctx->cb_fn(ctx->cb_ctx, count, rc); 1639 } 1640 1641 free(ctx); 1642 } 1643 1644 static void 1645 nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx) 1646 { 1647 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1648 struct nvme_bdev_ns *ns; 1649 struct nvme_bdev *nvme_bdev, *tmp; 1650 uint32_t i, nsid; 1651 size_t j; 1652 1653 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1654 assert(nvme_bdev_ctrlr != NULL); 1655 1656 /* 1657 * Report the new bdevs that were created in this call. 1658 * There can be more than one bdev per NVMe controller. 1659 */ 1660 j = 0; 1661 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 1662 nsid = i + 1; 1663 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 1664 if (!ns->populated) { 1665 continue; 1666 } 1667 assert(ns->id == nsid); 1668 TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { 1669 if (j < ctx->count) { 1670 ctx->names[j] = nvme_bdev->disk.name; 1671 j++; 1672 } else { 1673 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n", 1674 ctx->count); 1675 populate_namespaces_cb(ctx, 0, -ERANGE); 1676 return; 1677 } 1678 } 1679 } 1680 1681 populate_namespaces_cb(ctx, j, 0); 1682 } 1683 1684 static void 1685 connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 1686 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 1687 { 1688 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; 1689 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1690 struct nvme_async_probe_ctx *ctx; 1691 int rc; 1692 1693 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts); 1694 1695 spdk_poller_unregister(&ctx->poller); 1696 1697 rc = create_ctrlr(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags); 1698 if (rc) { 1699 SPDK_ERRLOG("Failed to create new device\n"); 1700 populate_namespaces_cb(ctx, 0, rc); 1701 return; 1702 } 1703 1704 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid); 1705 assert(nvme_bdev_ctrlr != NULL); 1706 1707 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, ctx); 1708 } 1709 1710 static int 1711 bdev_nvme_async_poll(void *arg) 1712 { 1713 struct nvme_async_probe_ctx *ctx = arg; 1714 int rc; 1715 1716 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); 1717 if (spdk_unlikely(rc != -EAGAIN && rc != 0)) { 1718 spdk_poller_unregister(&ctx->poller); 1719 free(ctx); 1720 } 1721 1722 return SPDK_POLLER_BUSY; 1723 } 1724 1725 int 1726 bdev_nvme_create(struct spdk_nvme_transport_id *trid, 1727 struct spdk_nvme_host_id *hostid, 1728 const char *base_name, 1729 const char **names, 1730 uint32_t count, 1731 const char *hostnqn, 1732 uint32_t prchk_flags, 1733 spdk_bdev_create_nvme_fn cb_fn, 1734 void *cb_ctx) 1735 { 1736 struct nvme_probe_skip_entry *entry, *tmp; 1737 struct nvme_async_probe_ctx *ctx; 1738 1739 if (nvme_bdev_ctrlr_get(trid) != NULL) { 1740 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); 1741 return -EEXIST; 1742 } 1743 1744 if (nvme_bdev_ctrlr_get_by_name(base_name)) { 1745 SPDK_ERRLOG("A controller with the provided name (%s) already exists.\n", base_name); 1746 return -EEXIST; 1747 } 1748 1749 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1750 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) { 1751 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) { 1752 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 1753 free(entry); 1754 break; 1755 } 1756 } 1757 } 1758 1759 ctx = calloc(1, sizeof(*ctx)); 1760 if (!ctx) { 1761 return -ENOMEM; 1762 } 1763 ctx->base_name = base_name; 1764 ctx->names = names; 1765 ctx->count = count; 1766 ctx->cb_fn = cb_fn; 1767 ctx->cb_ctx = cb_ctx; 1768 ctx->prchk_flags = prchk_flags; 1769 ctx->trid = *trid; 1770 1771 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts)); 1772 ctx->opts.transport_retry_count = g_opts.retry_count; 1773 1774 if (hostnqn) { 1775 snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn); 1776 } 1777 1778 if (hostid->hostaddr[0] != '\0') { 1779 snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr); 1780 } 1781 1782 if (hostid->hostsvcid[0] != '\0') { 1783 snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid); 1784 } 1785 1786 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb); 1787 if (ctx->probe_ctx == NULL) { 1788 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr); 1789 free(ctx); 1790 return -ENODEV; 1791 } 1792 ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); 1793 1794 return 0; 1795 } 1796 1797 int 1798 bdev_nvme_delete(const char *name) 1799 { 1800 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; 1801 struct nvme_probe_skip_entry *entry; 1802 1803 if (name == NULL) { 1804 return -EINVAL; 1805 } 1806 1807 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name); 1808 if (nvme_bdev_ctrlr == NULL) { 1809 SPDK_ERRLOG("Failed to find NVMe controller\n"); 1810 return -ENODEV; 1811 } 1812 1813 if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1814 entry = calloc(1, sizeof(*entry)); 1815 if (!entry) { 1816 return -ENOMEM; 1817 } 1818 entry->trid = *nvme_bdev_ctrlr->trid; 1819 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq); 1820 } 1821 1822 remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); 1823 return 0; 1824 } 1825 1826 static int 1827 bdev_nvme_library_init(void) 1828 { 1829 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 1830 struct spdk_conf_section *sp; 1831 const char *val; 1832 int rc = 0; 1833 int64_t intval = 0; 1834 size_t i; 1835 struct nvme_probe_ctx *probe_ctx = NULL; 1836 int retry_count; 1837 uint32_t local_nvme_num = 0; 1838 int64_t hotplug_period; 1839 bool hotplug_enabled = g_nvme_hotplug_enabled; 1840 1841 g_bdev_nvme_init_thread = spdk_get_thread(); 1842 1843 spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_poll_group_create_cb, 1844 bdev_nvme_poll_group_destroy_cb, 1845 sizeof(struct nvme_bdev_poll_group), "bdev_nvme_poll_groups"); 1846 1847 sp = spdk_conf_find_section(NULL, "Nvme"); 1848 if (sp == NULL) { 1849 goto end; 1850 } 1851 1852 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1853 if (probe_ctx == NULL) { 1854 SPDK_ERRLOG("Failed to allocate probe_ctx\n"); 1855 rc = -1; 1856 goto end; 1857 } 1858 1859 retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); 1860 if (retry_count >= 0) { 1861 g_opts.retry_count = retry_count; 1862 } 1863 1864 val = spdk_conf_section_get_val(sp, "TimeoutUsec"); 1865 if (val != NULL) { 1866 intval = spdk_strtoll(val, 10); 1867 if (intval < 0) { 1868 SPDK_ERRLOG("Invalid TimeoutUsec value\n"); 1869 rc = -1; 1870 goto end; 1871 } 1872 } 1873 1874 g_opts.timeout_us = intval; 1875 1876 if (g_opts.timeout_us > 0) { 1877 val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); 1878 if (val != NULL) { 1879 if (!strcasecmp(val, "Reset")) { 1880 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; 1881 } else if (!strcasecmp(val, "Abort")) { 1882 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; 1883 } 1884 } 1885 } 1886 1887 intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); 1888 if (intval > 0) { 1889 g_opts.nvme_adminq_poll_period_us = intval; 1890 } 1891 1892 intval = spdk_conf_section_get_intval(sp, "IOPollRate"); 1893 if (intval > 0) { 1894 g_opts.nvme_ioq_poll_period_us = intval; 1895 } 1896 1897 if (spdk_process_is_primary()) { 1898 hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false); 1899 } 1900 1901 hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate"); 1902 if (hotplug_period < 0) { 1903 hotplug_period = 0; 1904 } 1905 1906 g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN"); 1907 probe_ctx->hostnqn = g_nvme_hostnqn; 1908 1909 g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit", 1910 SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT); 1911 1912 for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { 1913 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); 1914 if (val == NULL) { 1915 break; 1916 } 1917 1918 rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); 1919 if (rc < 0) { 1920 SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); 1921 rc = -1; 1922 goto end; 1923 } 1924 1925 rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val); 1926 if (rc < 0) { 1927 SPDK_ERRLOG("Unable to parse HostID: %s\n", val); 1928 rc = -1; 1929 goto end; 1930 } 1931 1932 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); 1933 if (val == NULL) { 1934 SPDK_ERRLOG("No name provided for TransportID\n"); 1935 rc = -1; 1936 goto end; 1937 } 1938 1939 probe_ctx->names[i] = val; 1940 1941 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2); 1942 if (val != NULL) { 1943 rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val); 1944 if (rc < 0) { 1945 SPDK_ERRLOG("Unable to parse prchk: %s\n", val); 1946 rc = -1; 1947 goto end; 1948 } 1949 } 1950 1951 probe_ctx->count++; 1952 1953 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 1954 struct spdk_nvme_ctrlr *ctrlr; 1955 struct spdk_nvme_ctrlr_opts opts; 1956 1957 if (nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 1958 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", 1959 probe_ctx->trids[i].traddr); 1960 rc = -1; 1961 goto end; 1962 } 1963 1964 if (probe_ctx->trids[i].subnqn[0] == '\0') { 1965 SPDK_ERRLOG("Need to provide subsystem nqn\n"); 1966 rc = -1; 1967 goto end; 1968 } 1969 1970 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 1971 opts.transport_retry_count = g_opts.retry_count; 1972 1973 if (probe_ctx->hostnqn != NULL) { 1974 snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn); 1975 } 1976 1977 if (probe_ctx->hostids[i].hostaddr[0] != '\0') { 1978 snprintf(opts.src_addr, sizeof(opts.src_addr), "%s", probe_ctx->hostids[i].hostaddr); 1979 } 1980 1981 if (probe_ctx->hostids[i].hostsvcid[0] != '\0') { 1982 snprintf(opts.src_svcid, sizeof(opts.src_svcid), "%s", probe_ctx->hostids[i].hostsvcid); 1983 } 1984 1985 ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts)); 1986 if (ctrlr == NULL) { 1987 SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n", 1988 probe_ctx->trids[i].traddr); 1989 rc = -1; 1990 goto end; 1991 } 1992 1993 rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i], 0); 1994 if (rc) { 1995 goto end; 1996 } 1997 1998 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&probe_ctx->trids[i]); 1999 if (!nvme_bdev_ctrlr) { 2000 SPDK_ERRLOG("Failed to find new NVMe controller\n"); 2001 rc = -ENODEV; 2002 goto end; 2003 } 2004 2005 nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL); 2006 } else { 2007 local_nvme_num++; 2008 } 2009 } 2010 2011 if (local_nvme_num > 0) { 2012 /* used to probe local NVMe device */ 2013 if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) { 2014 rc = -1; 2015 goto end; 2016 } 2017 2018 for (i = 0; i < probe_ctx->count; i++) { 2019 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { 2020 continue; 2021 } 2022 2023 if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { 2024 SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); 2025 SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); 2026 } 2027 } 2028 } 2029 2030 rc = bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL); 2031 if (rc) { 2032 SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc)); 2033 rc = -1; 2034 } 2035 end: 2036 free(probe_ctx); 2037 return rc; 2038 } 2039 2040 static void 2041 bdev_nvme_library_fini(void) 2042 { 2043 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp; 2044 struct nvme_probe_skip_entry *entry, *entry_tmp; 2045 struct nvme_bdev_ns *ns; 2046 uint32_t i; 2047 2048 spdk_poller_unregister(&g_hotplug_poller); 2049 free(g_hotplug_probe_ctx); 2050 2051 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) { 2052 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq); 2053 free(entry); 2054 } 2055 2056 pthread_mutex_lock(&g_bdev_nvme_mutex); 2057 TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) { 2058 if (nvme_bdev_ctrlr->destruct) { 2059 /* This controller's destruction was already started 2060 * before the application started shutting down 2061 */ 2062 continue; 2063 } 2064 2065 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2066 2067 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { 2068 uint32_t nsid = i + 1; 2069 2070 ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; 2071 if (ns->populated) { 2072 assert(ns->id == nsid); 2073 nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); 2074 } 2075 } 2076 2077 pthread_mutex_lock(&g_bdev_nvme_mutex); 2078 nvme_bdev_ctrlr->destruct = true; 2079 2080 if (nvme_bdev_ctrlr->ref == 0) { 2081 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2082 nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr); 2083 pthread_mutex_lock(&g_bdev_nvme_mutex); 2084 } 2085 } 2086 2087 g_bdev_nvme_module_finish = true; 2088 if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) { 2089 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2090 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL); 2091 spdk_bdev_module_finish_done(); 2092 return; 2093 } 2094 2095 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2096 } 2097 2098 static void 2099 bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io) 2100 { 2101 struct spdk_bdev *bdev = bdev_io->bdev; 2102 struct spdk_dif_ctx dif_ctx; 2103 struct spdk_dif_error err_blk = {}; 2104 int rc; 2105 2106 rc = spdk_dif_ctx_init(&dif_ctx, 2107 bdev->blocklen, bdev->md_len, bdev->md_interleave, 2108 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 2109 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0); 2110 if (rc != 0) { 2111 SPDK_ERRLOG("Initialization of DIF context failed\n"); 2112 return; 2113 } 2114 2115 if (bdev->md_interleave) { 2116 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2117 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2118 } else { 2119 struct iovec md_iov = { 2120 .iov_base = bdev_io->u.bdev.md_buf, 2121 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 2122 }; 2123 2124 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 2125 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 2126 } 2127 2128 if (rc != 0) { 2129 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 2130 err_blk.err_type, err_blk.err_offset); 2131 } else { 2132 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n"); 2133 } 2134 } 2135 2136 static void 2137 bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2138 { 2139 struct nvme_bdev_io *bio = ref; 2140 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2141 2142 if (spdk_nvme_cpl_is_success(cpl)) { 2143 /* Run PI verification for read data buffer. */ 2144 bdev_nvme_verify_pi_error(bdev_io); 2145 } 2146 2147 /* Return original completion status */ 2148 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, 2149 bio->cpl.status.sc); 2150 } 2151 2152 static void 2153 bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl) 2154 { 2155 struct nvme_bdev_io *bio = ref; 2156 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2157 int ret; 2158 2159 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) { 2160 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n", 2161 cpl->status.sct, cpl->status.sc); 2162 2163 /* Save completion status to use after verifying PI error. */ 2164 bio->cpl = *cpl; 2165 2166 /* Read without PI checking to verify PI error. */ 2167 ret = bdev_nvme_no_pi_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, 2168 spdk_bdev_io_get_io_channel(bdev_io), 2169 bio, 2170 bdev_io->u.bdev.iovs, 2171 bdev_io->u.bdev.iovcnt, 2172 bdev_io->u.bdev.md_buf, 2173 bdev_io->u.bdev.num_blocks, 2174 bdev_io->u.bdev.offset_blocks); 2175 if (ret == 0) { 2176 return; 2177 } 2178 } 2179 2180 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2181 } 2182 2183 static void 2184 bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2185 { 2186 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2187 2188 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2189 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n", 2190 cpl->status.sct, cpl->status.sc); 2191 /* Run PI verification for write data buffer if PI error is detected. */ 2192 bdev_nvme_verify_pi_error(bdev_io); 2193 } 2194 2195 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2196 } 2197 2198 static void 2199 bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2200 { 2201 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2202 2203 if (spdk_nvme_cpl_is_pi_error(cpl)) { 2204 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n", 2205 cpl->status.sct, cpl->status.sc); 2206 /* Run PI verification for compare data buffer if PI error is detected. */ 2207 bdev_nvme_verify_pi_error(bdev_io); 2208 } 2209 2210 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2211 } 2212 2213 static void 2214 bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) 2215 { 2216 struct nvme_bdev_io *bio = ref; 2217 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2218 2219 /* Compare operation completion */ 2220 if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { 2221 /* Save compare result for write callback */ 2222 bio->cpl = *cpl; 2223 return; 2224 } 2225 2226 /* Write operation completion */ 2227 if (spdk_nvme_cpl_is_error(&bio->cpl)) { 2228 /* If bio->cpl is already an error, it means the compare operation failed. In that case, 2229 * complete the IO with the compare operation's status. 2230 */ 2231 if (!spdk_nvme_cpl_is_error(cpl)) { 2232 SPDK_ERRLOG("Unexpected write success after compare failure.\n"); 2233 } 2234 2235 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2236 } else { 2237 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2238 } 2239 } 2240 2241 static void 2242 bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) 2243 { 2244 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); 2245 2246 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); 2247 } 2248 2249 static void 2250 bdev_nvme_admin_passthru_completion(void *ctx) 2251 { 2252 struct nvme_bdev_io *bio = ctx; 2253 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2254 2255 spdk_bdev_io_complete_nvme_status(bdev_io, 2256 bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc); 2257 } 2258 2259 static void 2260 bdev_nvme_abort_completion(void *ctx) 2261 { 2262 struct nvme_bdev_io *bio = ctx; 2263 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2264 2265 if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) { 2266 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 2267 } else { 2268 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 2269 } 2270 } 2271 2272 static void 2273 bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl) 2274 { 2275 struct nvme_bdev_io *bio = ref; 2276 2277 bio->cpl = *cpl; 2278 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio); 2279 } 2280 2281 static void 2282 bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) 2283 { 2284 struct nvme_bdev_io *bio = ref; 2285 2286 bio->cpl = *cpl; 2287 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); 2288 } 2289 2290 static void 2291 bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) 2292 { 2293 struct nvme_bdev_io *bio = ref; 2294 struct iovec *iov; 2295 2296 bio->iov_offset = sgl_offset; 2297 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { 2298 iov = &bio->iovs[bio->iovpos]; 2299 if (bio->iov_offset < iov->iov_len) { 2300 break; 2301 } 2302 2303 bio->iov_offset -= iov->iov_len; 2304 } 2305 } 2306 2307 static int 2308 bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) 2309 { 2310 struct nvme_bdev_io *bio = ref; 2311 struct iovec *iov; 2312 2313 assert(bio->iovpos < bio->iovcnt); 2314 2315 iov = &bio->iovs[bio->iovpos]; 2316 2317 *address = iov->iov_base; 2318 *length = iov->iov_len; 2319 2320 if (bio->iov_offset) { 2321 assert(bio->iov_offset <= iov->iov_len); 2322 *address += bio->iov_offset; 2323 *length -= bio->iov_offset; 2324 } 2325 2326 bio->iov_offset += *length; 2327 if (bio->iov_offset == iov->iov_len) { 2328 bio->iovpos++; 2329 bio->iov_offset = 0; 2330 } 2331 2332 return 0; 2333 } 2334 2335 static void 2336 bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset) 2337 { 2338 struct nvme_bdev_io *bio = ref; 2339 struct iovec *iov; 2340 2341 bio->fused_iov_offset = sgl_offset; 2342 for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) { 2343 iov = &bio->fused_iovs[bio->fused_iovpos]; 2344 if (bio->fused_iov_offset < iov->iov_len) { 2345 break; 2346 } 2347 2348 bio->fused_iov_offset -= iov->iov_len; 2349 } 2350 } 2351 2352 static int 2353 bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length) 2354 { 2355 struct nvme_bdev_io *bio = ref; 2356 struct iovec *iov; 2357 2358 assert(bio->fused_iovpos < bio->fused_iovcnt); 2359 2360 iov = &bio->fused_iovs[bio->fused_iovpos]; 2361 2362 *address = iov->iov_base; 2363 *length = iov->iov_len; 2364 2365 if (bio->fused_iov_offset) { 2366 assert(bio->fused_iov_offset <= iov->iov_len); 2367 *address += bio->fused_iov_offset; 2368 *length -= bio->fused_iov_offset; 2369 } 2370 2371 bio->fused_iov_offset += *length; 2372 if (bio->fused_iov_offset == iov->iov_len) { 2373 bio->fused_iovpos++; 2374 bio->fused_iov_offset = 0; 2375 } 2376 2377 return 0; 2378 } 2379 2380 static int 2381 bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2382 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2383 void *md, uint64_t lba_count, uint64_t lba) 2384 { 2385 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2386 int rc; 2387 2388 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx without PI check\n", 2389 lba_count, lba); 2390 2391 bio->iovs = iov; 2392 bio->iovcnt = iovcnt; 2393 bio->iovpos = 0; 2394 bio->iov_offset = 0; 2395 2396 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2397 bdev_nvme_no_pi_readv_done, bio, 0, 2398 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2399 md, 0, 0); 2400 2401 if (rc != 0 && rc != -ENOMEM) { 2402 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc); 2403 } 2404 return rc; 2405 } 2406 2407 static int 2408 bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2409 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, 2410 void *md, uint64_t lba_count, uint64_t lba) 2411 { 2412 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2413 int rc; 2414 2415 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n", 2416 lba_count, lba); 2417 2418 bio->iovs = iov; 2419 bio->iovcnt = iovcnt; 2420 bio->iovpos = 0; 2421 bio->iov_offset = 0; 2422 2423 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2424 bdev_nvme_readv_done, bio, nbdev->disk.dif_check_flags, 2425 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2426 md, 0, 0); 2427 2428 if (rc != 0 && rc != -ENOMEM) { 2429 SPDK_ERRLOG("readv failed: rc = %d\n", rc); 2430 } 2431 return rc; 2432 } 2433 2434 static int 2435 bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2436 struct nvme_bdev_io *bio, 2437 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2438 { 2439 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2440 int rc; 2441 2442 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n", 2443 lba_count, lba); 2444 2445 bio->iovs = iov; 2446 bio->iovcnt = iovcnt; 2447 bio->iovpos = 0; 2448 bio->iov_offset = 0; 2449 2450 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2451 bdev_nvme_writev_done, bio, nbdev->disk.dif_check_flags, 2452 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2453 md, 0, 0); 2454 2455 if (rc != 0 && rc != -ENOMEM) { 2456 SPDK_ERRLOG("writev failed: rc = %d\n", rc); 2457 } 2458 return rc; 2459 } 2460 2461 static int 2462 bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2463 struct nvme_bdev_io *bio, 2464 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2465 { 2466 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2467 int rc; 2468 2469 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare %lu blocks with offset %#lx\n", 2470 lba_count, lba); 2471 2472 bio->iovs = iov; 2473 bio->iovcnt = iovcnt; 2474 bio->iovpos = 0; 2475 bio->iov_offset = 0; 2476 2477 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2478 bdev_nvme_comparev_done, bio, nbdev->disk.dif_check_flags, 2479 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, 2480 md, 0, 0); 2481 2482 if (rc != 0 && rc != -ENOMEM) { 2483 SPDK_ERRLOG("comparev failed: rc = %d\n", rc); 2484 } 2485 return rc; 2486 } 2487 2488 static int 2489 bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2490 struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov, 2491 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba) 2492 { 2493 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2494 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2495 uint32_t flags = nbdev->disk.dif_check_flags; 2496 int rc; 2497 2498 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare and write %lu blocks with offset %#lx\n", 2499 lba_count, lba); 2500 2501 bio->iovs = cmp_iov; 2502 bio->iovcnt = cmp_iovcnt; 2503 bio->iovpos = 0; 2504 bio->iov_offset = 0; 2505 bio->fused_iovs = write_iov; 2506 bio->fused_iovcnt = write_iovcnt; 2507 bio->fused_iovpos = 0; 2508 bio->fused_iov_offset = 0; 2509 2510 if (bdev_io->num_retries == 0) { 2511 bio->first_fused_submitted = false; 2512 } 2513 2514 if (!bio->first_fused_submitted) { 2515 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2516 memset(&bio->cpl, 0, sizeof(bio->cpl)); 2517 2518 rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2519 bdev_nvme_comparev_and_writev_done, bio, flags, 2520 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0); 2521 if (rc == 0) { 2522 bio->first_fused_submitted = true; 2523 flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST; 2524 } else { 2525 if (rc != -ENOMEM) { 2526 SPDK_ERRLOG("compare failed: rc = %d\n", rc); 2527 } 2528 return rc; 2529 } 2530 } 2531 2532 flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND; 2533 2534 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count, 2535 bdev_nvme_comparev_and_writev_done, bio, flags, 2536 bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0); 2537 if (rc != 0 && rc != -ENOMEM) { 2538 SPDK_ERRLOG("write failed: rc = %d\n", rc); 2539 rc = 0; 2540 } 2541 2542 return rc; 2543 } 2544 2545 static int 2546 bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2547 struct nvme_bdev_io *bio, 2548 uint64_t offset_blocks, 2549 uint64_t num_blocks) 2550 { 2551 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2552 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; 2553 struct spdk_nvme_dsm_range *range; 2554 uint64_t offset, remaining; 2555 uint64_t num_ranges_u64; 2556 uint16_t num_ranges; 2557 int rc; 2558 2559 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / 2560 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2561 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { 2562 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); 2563 return -EINVAL; 2564 } 2565 num_ranges = (uint16_t)num_ranges_u64; 2566 2567 offset = offset_blocks; 2568 remaining = num_blocks; 2569 range = &dsm_ranges[0]; 2570 2571 /* Fill max-size ranges until the remaining blocks fit into one range */ 2572 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { 2573 range->attributes.raw = 0; 2574 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2575 range->starting_lba = offset; 2576 2577 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2578 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; 2579 range++; 2580 } 2581 2582 /* Final range describes the remaining blocks */ 2583 range->attributes.raw = 0; 2584 range->length = remaining; 2585 range->starting_lba = offset; 2586 2587 rc = spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, 2588 SPDK_NVME_DSM_ATTR_DEALLOCATE, 2589 dsm_ranges, num_ranges, 2590 bdev_nvme_queued_done, bio); 2591 2592 return rc; 2593 } 2594 2595 static int 2596 bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2597 struct nvme_bdev_io *bio, 2598 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2599 { 2600 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2601 2602 if (nbytes > max_xfer_size) { 2603 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2604 return -EINVAL; 2605 } 2606 2607 bio->orig_thread = spdk_io_channel_get_thread(ch); 2608 2609 return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_bdev_ctrlr->ctrlr, cmd, buf, 2610 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); 2611 } 2612 2613 static int 2614 bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2615 struct nvme_bdev_io *bio, 2616 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) 2617 { 2618 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2619 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2620 2621 if (nbytes > max_xfer_size) { 2622 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2623 return -EINVAL; 2624 } 2625 2626 /* 2627 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2628 * so fill it out automatically. 2629 */ 2630 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2631 2632 return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2633 (uint32_t)nbytes, bdev_nvme_queued_done, bio); 2634 } 2635 2636 static int 2637 bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2638 struct nvme_bdev_io *bio, 2639 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) 2640 { 2641 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2642 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->nvme_ns->ns); 2643 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr); 2644 2645 if (nbytes > max_xfer_size) { 2646 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); 2647 return -EINVAL; 2648 } 2649 2650 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns)) { 2651 SPDK_ERRLOG("invalid meta data buffer size\n"); 2652 return -EINVAL; 2653 } 2654 2655 /* 2656 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, 2657 * so fill it out automatically. 2658 */ 2659 cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns); 2660 2661 return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, 2662 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); 2663 } 2664 2665 static void 2666 bdev_nvme_abort_admin_cmd(void *ctx) 2667 { 2668 struct nvme_bdev_io *bio = ctx; 2669 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); 2670 struct nvme_bdev *nbdev; 2671 struct nvme_bdev_io *bio_to_abort; 2672 int rc; 2673 2674 nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt; 2675 bio_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx; 2676 2677 rc = spdk_nvme_ctrlr_cmd_abort_ext(nbdev->nvme_bdev_ctrlr->ctrlr, 2678 NULL, 2679 bio_to_abort, 2680 bdev_nvme_abort_done, bio); 2681 if (rc == -ENOENT) { 2682 /* If no admin command was found in admin qpair, complete the abort 2683 * request with failure. 2684 */ 2685 bio->cpl.cdw0 |= 1U; 2686 bio->cpl.status.sc = SPDK_NVME_SC_SUCCESS; 2687 bio->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2688 2689 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio); 2690 } 2691 } 2692 2693 static int 2694 bdev_nvme_abort(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, 2695 struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort) 2696 { 2697 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); 2698 int rc; 2699 2700 bio->orig_thread = spdk_io_channel_get_thread(ch); 2701 2702 rc = spdk_nvme_ctrlr_cmd_abort_ext(nbdev->nvme_bdev_ctrlr->ctrlr, 2703 nvme_ch->qpair, 2704 bio_to_abort, 2705 bdev_nvme_abort_done, bio); 2706 if (rc == -ENOENT) { 2707 /* If no command was found in I/O qpair, the target command may be 2708 * admin command. Only a single thread tries aborting admin command 2709 * to clean I/O flow. 2710 */ 2711 spdk_thread_send_msg(nbdev->nvme_bdev_ctrlr->thread, 2712 bdev_nvme_abort_admin_cmd, bio); 2713 rc = 0; 2714 } 2715 2716 return rc; 2717 } 2718 2719 static void 2720 bdev_nvme_get_spdk_running_config(FILE *fp) 2721 { 2722 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2723 2724 fprintf(fp, "\n[Nvme]"); 2725 fprintf(fp, "\n" 2726 "# NVMe Device Whitelist\n" 2727 "# Users may specify which NVMe devices to claim by their transport id.\n" 2728 "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n" 2729 "# The second argument is the assigned name, which can be referenced from\n" 2730 "# other sections in the configuration file. For NVMe devices, a namespace\n" 2731 "# is automatically appended to each name in the format <YourName>nY, where\n" 2732 "# Y is the NSID (starts at 1).\n"); 2733 2734 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2735 const char *trtype; 2736 const char *prchk_flags; 2737 2738 trtype = spdk_nvme_transport_id_trtype_str(nvme_bdev_ctrlr->trid->trtype); 2739 if (!trtype) { 2740 continue; 2741 } 2742 2743 if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 2744 fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n", 2745 trtype, 2746 nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->name); 2747 } else { 2748 const char *adrfam; 2749 2750 adrfam = spdk_nvme_transport_id_adrfam_str(nvme_bdev_ctrlr->trid->adrfam); 2751 prchk_flags = spdk_nvme_prchk_flags_str(nvme_bdev_ctrlr->prchk_flags); 2752 2753 if (adrfam) { 2754 fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2755 trtype, adrfam, 2756 nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->trid->trsvcid, 2757 nvme_bdev_ctrlr->trid->subnqn, nvme_bdev_ctrlr->name); 2758 } else { 2759 fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s", 2760 trtype, 2761 nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->trid->trsvcid, 2762 nvme_bdev_ctrlr->trid->subnqn, nvme_bdev_ctrlr->name); 2763 } 2764 2765 if (prchk_flags) { 2766 fprintf(fp, " \"%s\"\n", prchk_flags); 2767 } else { 2768 fprintf(fp, "\n"); 2769 } 2770 } 2771 } 2772 2773 fprintf(fp, "\n" 2774 "# The number of attempts per I/O when an I/O fails. Do not include\n" 2775 "# this key to get the default behavior.\n"); 2776 fprintf(fp, "RetryCount %d\n", g_opts.retry_count); 2777 fprintf(fp, "\n" 2778 "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n"); 2779 fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us); 2780 2781 fprintf(fp, "\n" 2782 "# Action to take on command time out. Only valid when Timeout is greater\n" 2783 "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n" 2784 "# the command, or 'None' to just print a message but do nothing.\n" 2785 "# Admin command timeouts will always result in a reset.\n"); 2786 switch (g_opts.action_on_timeout) { 2787 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: 2788 fprintf(fp, "ActionOnTimeout None\n"); 2789 break; 2790 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: 2791 fprintf(fp, "ActionOnTimeout Reset\n"); 2792 break; 2793 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: 2794 fprintf(fp, "ActionOnTimeout Abort\n"); 2795 break; 2796 } 2797 2798 fprintf(fp, "\n" 2799 "# Set how often the admin queue is polled for asynchronous events.\n" 2800 "# Units in microseconds.\n"); 2801 fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us); 2802 fprintf(fp, "IOPollRate %" PRIu64"\n", g_opts.nvme_ioq_poll_period_us); 2803 fprintf(fp, "\n" 2804 "# Disable handling of hotplug (runtime insert and remove) events,\n" 2805 "# users can set to Yes if want to enable it.\n" 2806 "# Default: No\n"); 2807 fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No"); 2808 fprintf(fp, "\n" 2809 "# Set how often the hotplug is processed for insert and remove events." 2810 "# Units in microseconds.\n"); 2811 fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us); 2812 if (g_nvme_hostnqn) { 2813 fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn); 2814 } 2815 fprintf(fp, "DelayCmdSubmit %s\n", g_opts.delay_cmd_submit ? "True" : "False"); 2816 2817 fprintf(fp, "\n"); 2818 } 2819 2820 static void 2821 nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2822 { 2823 /* nop */ 2824 } 2825 2826 static void 2827 nvme_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns) 2828 { 2829 g_config_json_namespace_fn[ns->type](w, ns); 2830 } 2831 2832 static int 2833 bdev_nvme_config_json(struct spdk_json_write_ctx *w) 2834 { 2835 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; 2836 struct spdk_nvme_transport_id *trid; 2837 const char *action; 2838 uint32_t nsid; 2839 2840 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { 2841 action = "reset"; 2842 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { 2843 action = "abort"; 2844 } else { 2845 action = "none"; 2846 } 2847 2848 spdk_json_write_object_begin(w); 2849 2850 spdk_json_write_named_string(w, "method", "bdev_nvme_set_options"); 2851 2852 spdk_json_write_named_object_begin(w, "params"); 2853 spdk_json_write_named_string(w, "action_on_timeout", action); 2854 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); 2855 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); 2856 spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst); 2857 spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight); 2858 spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight); 2859 spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight); 2860 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); 2861 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); 2862 spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); 2863 spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); 2864 spdk_json_write_object_end(w); 2865 2866 spdk_json_write_object_end(w); 2867 2868 pthread_mutex_lock(&g_bdev_nvme_mutex); 2869 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { 2870 trid = nvme_bdev_ctrlr->trid; 2871 2872 spdk_json_write_object_begin(w); 2873 2874 spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller"); 2875 2876 spdk_json_write_named_object_begin(w, "params"); 2877 spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name); 2878 nvme_bdev_dump_trid_json(trid, w); 2879 spdk_json_write_named_bool(w, "prchk_reftag", 2880 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0); 2881 spdk_json_write_named_bool(w, "prchk_guard", 2882 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0); 2883 2884 spdk_json_write_object_end(w); 2885 2886 spdk_json_write_object_end(w); 2887 2888 for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) { 2889 if (!nvme_bdev_ctrlr->namespaces[nsid]->populated) { 2890 continue; 2891 } 2892 2893 nvme_namespace_config_json(w, nvme_bdev_ctrlr->namespaces[nsid]); 2894 } 2895 } 2896 2897 /* Dump as last parameter to give all NVMe bdevs chance to be constructed 2898 * before enabling hotplug poller. 2899 */ 2900 spdk_json_write_object_begin(w); 2901 spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug"); 2902 2903 spdk_json_write_named_object_begin(w, "params"); 2904 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); 2905 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); 2906 spdk_json_write_object_end(w); 2907 2908 spdk_json_write_object_end(w); 2909 2910 pthread_mutex_unlock(&g_bdev_nvme_mutex); 2911 return 0; 2912 } 2913 2914 struct spdk_nvme_ctrlr * 2915 bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) 2916 { 2917 if (!bdev || bdev->module != &nvme_if) { 2918 return NULL; 2919 } 2920 2921 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_bdev_ctrlr->ctrlr; 2922 } 2923 2924 SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME) 2925