1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/config.h" 7 #include "spdk/nvmf_spec.h" 8 #include "spdk/string.h" 9 #include "spdk/env.h" 10 #include "nvme_internal.h" 11 #include "nvme_io_msg.h" 12 13 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" 14 15 struct nvme_driver *g_spdk_nvme_driver; 16 pid_t g_spdk_nvme_pid; 17 18 /* gross timeout of 180 seconds in milliseconds */ 19 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000; 20 21 /* Per-process attached controller list */ 22 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs = 23 TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs); 24 25 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */ 26 static bool 27 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) 28 { 29 return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; 30 } 31 32 void 33 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, 34 struct spdk_nvme_ctrlr *ctrlr) 35 { 36 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 37 } 38 39 static void 40 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr) 41 { 42 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 43 if (nvme_ctrlr_shared(ctrlr)) { 44 TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 45 } else { 46 TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); 47 } 48 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 49 } 50 51 static int 52 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr, 53 struct nvme_ctrlr_detach_ctx **_ctx) 54 { 55 struct nvme_ctrlr_detach_ctx *ctx; 56 int ref_count; 57 58 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 59 60 ref_count = nvme_ctrlr_get_ref_count(ctrlr); 61 assert(ref_count > 0); 62 63 if (ref_count == 1) { 64 /* This is the last reference to the controller, so we need to 65 * allocate a context to destruct it. 66 */ 67 ctx = calloc(1, sizeof(*ctx)); 68 if (ctx == NULL) { 69 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 70 71 return -ENOMEM; 72 } 73 ctx->ctrlr = ctrlr; 74 ctx->cb_fn = nvme_ctrlr_detach_async_finish; 75 76 nvme_ctrlr_proc_put_ref(ctrlr); 77 78 nvme_io_msg_ctrlr_detach(ctrlr); 79 80 nvme_ctrlr_destruct_async(ctrlr, ctx); 81 82 *_ctx = ctx; 83 } else { 84 nvme_ctrlr_proc_put_ref(ctrlr); 85 } 86 87 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 88 89 return 0; 90 } 91 92 static int 93 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx) 94 { 95 int rc; 96 97 rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx); 98 if (rc == -EAGAIN) { 99 return -EAGAIN; 100 } 101 102 free(ctx); 103 104 return rc; 105 } 106 107 int 108 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr) 109 { 110 struct nvme_ctrlr_detach_ctx *ctx = NULL; 111 int rc; 112 113 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 114 if (rc != 0) { 115 return rc; 116 } else if (ctx == NULL) { 117 /* ctrlr was detached from the caller process but any other process 118 * still attaches it. 119 */ 120 return 0; 121 } 122 123 while (1) { 124 rc = nvme_ctrlr_detach_poll_async(ctx); 125 if (rc != -EAGAIN) { 126 break; 127 } 128 nvme_delay(1000); 129 } 130 131 return 0; 132 } 133 134 int 135 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr, 136 struct spdk_nvme_detach_ctx **_detach_ctx) 137 { 138 struct spdk_nvme_detach_ctx *detach_ctx; 139 struct nvme_ctrlr_detach_ctx *ctx = NULL; 140 int rc; 141 142 if (ctrlr == NULL || _detach_ctx == NULL) { 143 return -EINVAL; 144 } 145 146 /* Use a context header to poll detachment for multiple controllers. 147 * Allocate an new one if not allocated yet, or use the passed one otherwise. 148 */ 149 detach_ctx = *_detach_ctx; 150 if (detach_ctx == NULL) { 151 detach_ctx = calloc(1, sizeof(*detach_ctx)); 152 if (detach_ctx == NULL) { 153 return -ENOMEM; 154 } 155 TAILQ_INIT(&detach_ctx->head); 156 } 157 158 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 159 if (rc != 0 || ctx == NULL) { 160 /* If this detach failed and the context header is empty, it means we just 161 * allocated the header and need to free it before returning. 162 */ 163 if (TAILQ_EMPTY(&detach_ctx->head)) { 164 free(detach_ctx); 165 } 166 return rc; 167 } 168 169 /* Append a context for this detachment to the context header. */ 170 TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link); 171 172 *_detach_ctx = detach_ctx; 173 174 return 0; 175 } 176 177 int 178 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx) 179 { 180 struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx; 181 int rc; 182 183 if (detach_ctx == NULL) { 184 return -EINVAL; 185 } 186 187 TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) { 188 TAILQ_REMOVE(&detach_ctx->head, ctx, link); 189 190 rc = nvme_ctrlr_detach_poll_async(ctx); 191 if (rc == -EAGAIN) { 192 /* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */ 193 TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link); 194 } 195 } 196 197 if (!TAILQ_EMPTY(&detach_ctx->head)) { 198 return -EAGAIN; 199 } 200 201 free(detach_ctx); 202 return 0; 203 } 204 205 void 206 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx) 207 { 208 while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) { 209 ; 210 } 211 } 212 213 void 214 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) 215 { 216 struct nvme_completion_poll_status *status = arg; 217 218 if (status->timed_out) { 219 /* There is no routine waiting for the completion of this request, free allocated memory */ 220 spdk_free(status->dma_data); 221 free(status); 222 return; 223 } 224 225 /* 226 * Copy status into the argument passed by the caller, so that 227 * the caller can check the status to determine if the 228 * the request passed or failed. 229 */ 230 memcpy(&status->cpl, cpl, sizeof(*cpl)); 231 status->done = true; 232 } 233 234 static void 235 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 236 { 237 } 238 239 int 240 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair, 241 struct nvme_completion_poll_status *status, 242 pthread_mutex_t *robust_mutex) 243 { 244 int rc; 245 246 if (robust_mutex) { 247 nvme_robust_mutex_lock(robust_mutex); 248 } 249 250 if (qpair->poll_group) { 251 rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0, 252 dummy_disconnected_qpair_cb); 253 } else { 254 rc = spdk_nvme_qpair_process_completions(qpair, 0); 255 } 256 257 if (robust_mutex) { 258 nvme_robust_mutex_unlock(robust_mutex); 259 } 260 261 if (rc < 0) { 262 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 263 status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 264 goto error; 265 } 266 267 if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) { 268 goto error; 269 } 270 271 if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 272 union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr); 273 if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) { 274 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 275 status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 276 goto error; 277 } 278 } 279 280 if (!status->done) { 281 return -EAGAIN; 282 } else if (spdk_nvme_cpl_is_error(&status->cpl)) { 283 return -EIO; 284 } else { 285 return 0; 286 } 287 error: 288 /* Either transport error occurred or we've timed out. Either way, if the response hasn't 289 * been received yet, mark the command as timed out, so the status gets freed when the 290 * command is completed or aborted. 291 */ 292 if (!status->done) { 293 status->timed_out = true; 294 } 295 296 return -ECANCELED; 297 } 298 299 /** 300 * Poll qpair for completions until a command completes. 301 * 302 * \param qpair queue to poll 303 * \param status completion status. The user must fill this structure with zeroes before calling 304 * this function 305 * \param robust_mutex optional robust mutex to lock while polling qpair 306 * \param timeout_in_usecs optional timeout 307 * 308 * \return 0 if command completed without error, 309 * -EIO if command completed with error, 310 * -ECANCELED if command is not completed due to transport/device error or time expired 311 * 312 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 313 * and status as the callback argument. 314 */ 315 int 316 nvme_wait_for_completion_robust_lock_timeout( 317 struct spdk_nvme_qpair *qpair, 318 struct nvme_completion_poll_status *status, 319 pthread_mutex_t *robust_mutex, 320 uint64_t timeout_in_usecs) 321 { 322 int rc; 323 324 if (timeout_in_usecs) { 325 status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs * 326 spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 327 } else { 328 status->timeout_tsc = 0; 329 } 330 331 status->cpl.status_raw = 0; 332 do { 333 rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex); 334 } while (rc == -EAGAIN); 335 336 return rc; 337 } 338 339 /** 340 * Poll qpair for completions until a command completes. 341 * 342 * \param qpair queue to poll 343 * \param status completion status. The user must fill this structure with zeroes before calling 344 * this function 345 * \param robust_mutex optional robust mutex to lock while polling qpair 346 * 347 * \return 0 if command completed without error, 348 * -EIO if command completed with error, 349 * -ECANCELED if command is not completed due to transport/device error 350 * 351 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 352 * and status as the callback argument. 353 */ 354 int 355 nvme_wait_for_completion_robust_lock( 356 struct spdk_nvme_qpair *qpair, 357 struct nvme_completion_poll_status *status, 358 pthread_mutex_t *robust_mutex) 359 { 360 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0); 361 } 362 363 int 364 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, 365 struct nvme_completion_poll_status *status) 366 { 367 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0); 368 } 369 370 /** 371 * Poll qpair for completions until a command completes. 372 * 373 * \param qpair queue to poll 374 * \param status completion status. The user must fill this structure with zeroes before calling 375 * this function 376 * \param timeout_in_usecs optional timeout 377 * 378 * \return 0 if command completed without error, 379 * -EIO if command completed with error, 380 * -ECANCELED if command is not completed due to transport/device error or time expired 381 * 382 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 383 * and status as the callback argument. 384 */ 385 int 386 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, 387 struct nvme_completion_poll_status *status, 388 uint64_t timeout_in_usecs) 389 { 390 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs); 391 } 392 393 static void 394 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl) 395 { 396 struct nvme_request *req = arg; 397 spdk_nvme_cmd_cb user_cb_fn; 398 void *user_cb_arg; 399 enum spdk_nvme_data_transfer xfer; 400 401 if (req->user_buffer && req->payload_size) { 402 /* Copy back to the user buffer */ 403 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 404 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 405 if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 406 xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { 407 assert(req->pid == getpid()); 408 memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size); 409 } 410 } 411 412 user_cb_fn = req->user_cb_fn; 413 user_cb_arg = req->user_cb_arg; 414 nvme_cleanup_user_req(req); 415 416 /* Call the user's original callback now that the buffer has been copied */ 417 user_cb_fn(user_cb_arg, cpl); 418 419 } 420 421 /** 422 * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer. 423 * 424 * This is intended for use in non-fast-path functions (admin commands, reservations, etc.) 425 * where the overhead of a copy is not a problem. 426 */ 427 struct nvme_request * 428 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, 429 void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, 430 void *cb_arg, bool host_to_controller) 431 { 432 struct nvme_request *req; 433 void *dma_buffer = NULL; 434 435 if (buffer && payload_size) { 436 dma_buffer = spdk_zmalloc(payload_size, 4096, NULL, 437 SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA); 438 if (!dma_buffer) { 439 return NULL; 440 } 441 442 if (host_to_controller) { 443 memcpy(dma_buffer, buffer, payload_size); 444 } 445 } 446 447 req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete, 448 NULL); 449 if (!req) { 450 spdk_free(dma_buffer); 451 return NULL; 452 } 453 454 req->user_cb_fn = cb_fn; 455 req->user_cb_arg = cb_arg; 456 req->user_buffer = buffer; 457 req->cb_arg = req; 458 459 return req; 460 } 461 462 /** 463 * Check if a request has exceeded the controller timeout. 464 * 465 * \param req request to check for timeout. 466 * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn) 467 * \param active_proc per-process data for the controller associated with req 468 * \param now_tick current time from spdk_get_ticks() 469 * \return 0 if requests submitted more recently than req should still be checked for timeouts, or 470 * 1 if requests newer than req need not be checked. 471 * 472 * The request's timeout callback will be called if needed; the caller is only responsible for 473 * calling this function on each outstanding request. 474 */ 475 int 476 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, 477 struct spdk_nvme_ctrlr_process *active_proc, 478 uint64_t now_tick) 479 { 480 struct spdk_nvme_qpair *qpair = req->qpair; 481 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 482 uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ? 483 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks; 484 485 assert(active_proc->timeout_cb_fn != NULL); 486 487 if (req->timed_out || req->submit_tick == 0) { 488 return 0; 489 } 490 491 if (req->pid != g_spdk_nvme_pid) { 492 return 0; 493 } 494 495 if (nvme_qpair_is_admin_queue(qpair) && 496 req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 497 return 0; 498 } 499 500 if (req->submit_tick + timeout_ticks > now_tick) { 501 return 1; 502 } 503 504 req->timed_out = true; 505 506 /* 507 * We don't want to expose the admin queue to the user, 508 * so when we're timing out admin commands set the 509 * qpair to NULL. 510 */ 511 active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, 512 nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, 513 cid); 514 return 0; 515 } 516 517 int 518 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx) 519 { 520 int rc = 0; 521 522 #ifdef __FreeBSD__ 523 pthread_mutex_init(mtx, NULL); 524 #else 525 pthread_mutexattr_t attr; 526 527 if (pthread_mutexattr_init(&attr)) { 528 return -1; 529 } 530 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 531 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 532 pthread_mutex_init(mtx, &attr)) { 533 rc = -1; 534 } 535 pthread_mutexattr_destroy(&attr); 536 #endif 537 538 return rc; 539 } 540 541 int 542 nvme_driver_init(void) 543 { 544 static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER; 545 int ret = 0; 546 547 /* Use a special process-private mutex to ensure the global 548 * nvme driver object (g_spdk_nvme_driver) gets initialized by 549 * only one thread. Once that object is established and its 550 * mutex is initialized, we can unlock this mutex and use that 551 * one instead. 552 */ 553 pthread_mutex_lock(&g_init_mutex); 554 555 /* Each process needs its own pid. */ 556 g_spdk_nvme_pid = getpid(); 557 558 /* 559 * Only one thread from one process will do this driver init work. 560 * The primary process will reserve the shared memory and do the 561 * initialization. 562 * The secondary process will lookup the existing reserved memory. 563 */ 564 if (spdk_process_is_primary()) { 565 /* The unique named memzone already reserved. */ 566 if (g_spdk_nvme_driver != NULL) { 567 pthread_mutex_unlock(&g_init_mutex); 568 return 0; 569 } else { 570 g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME, 571 sizeof(struct nvme_driver), SPDK_ENV_NUMA_ID_ANY, 572 SPDK_MEMZONE_NO_IOVA_CONTIG); 573 } 574 575 if (g_spdk_nvme_driver == NULL) { 576 SPDK_ERRLOG("primary process failed to reserve memory\n"); 577 pthread_mutex_unlock(&g_init_mutex); 578 return -1; 579 } 580 } else { 581 g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME); 582 583 /* The unique named memzone already reserved by the primary process. */ 584 if (g_spdk_nvme_driver != NULL) { 585 int ms_waited = 0; 586 587 /* Wait the nvme driver to get initialized. */ 588 while ((g_spdk_nvme_driver->initialized == false) && 589 (ms_waited < g_nvme_driver_timeout_ms)) { 590 ms_waited++; 591 nvme_delay(1000); /* delay 1ms */ 592 } 593 if (g_spdk_nvme_driver->initialized == false) { 594 SPDK_ERRLOG("timeout waiting for primary process to init\n"); 595 pthread_mutex_unlock(&g_init_mutex); 596 return -1; 597 } 598 } else { 599 SPDK_ERRLOG("primary process is not started yet\n"); 600 pthread_mutex_unlock(&g_init_mutex); 601 return -1; 602 } 603 604 pthread_mutex_unlock(&g_init_mutex); 605 return 0; 606 } 607 608 /* 609 * At this moment, only one thread from the primary process will do 610 * the g_spdk_nvme_driver initialization 611 */ 612 assert(spdk_process_is_primary()); 613 614 ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock); 615 if (ret != 0) { 616 SPDK_ERRLOG("failed to initialize mutex\n"); 617 spdk_memzone_free(SPDK_NVME_DRIVER_NAME); 618 pthread_mutex_unlock(&g_init_mutex); 619 return ret; 620 } 621 622 /* The lock in the shared g_spdk_nvme_driver object is now ready to 623 * be used - so we can unlock the g_init_mutex here. 624 */ 625 pthread_mutex_unlock(&g_init_mutex); 626 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 627 628 g_spdk_nvme_driver->initialized = false; 629 g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen(); 630 if (g_spdk_nvme_driver->hotplug_fd < 0) { 631 SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n"); 632 } 633 634 TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs); 635 636 spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id); 637 638 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 639 640 return ret; 641 } 642 643 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 644 int 645 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, 646 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle) 647 { 648 struct spdk_nvme_ctrlr *ctrlr; 649 struct spdk_nvme_ctrlr_opts opts; 650 651 assert(trid != NULL); 652 653 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 654 655 if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) { 656 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, opts.hostnqn); 657 if (ctrlr) { 658 /* This ctrlr already exists. */ 659 660 if (ctrlr->is_destructed) { 661 /* This ctrlr is being destructed asynchronously. */ 662 SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n", 663 trid->traddr); 664 probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, trid, -EBUSY); 665 return -EBUSY; 666 } 667 668 /* Increase the ref count before calling attach_cb() as the user may 669 * call nvme_detach() immediately. */ 670 nvme_ctrlr_proc_get_ref(ctrlr); 671 672 if (probe_ctx->attach_cb) { 673 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 674 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 675 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 676 } 677 return 0; 678 } 679 680 ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle); 681 if (ctrlr == NULL) { 682 SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr); 683 probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, trid, -ENODEV); 684 return -1; 685 } 686 ctrlr->remove_cb = probe_ctx->remove_cb; 687 ctrlr->cb_ctx = probe_ctx->cb_ctx; 688 689 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 690 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 691 return 0; 692 } 693 694 return 1; 695 } 696 697 static void 698 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, 699 struct spdk_nvme_probe_ctx *probe_ctx) 700 { 701 int rc = 0; 702 struct nvme_ctrlr_detach_ctx *detach_ctx; 703 704 rc = nvme_ctrlr_process_init(ctrlr); 705 706 if (rc) { 707 /* Controller failed to initialize. */ 708 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 709 SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr); 710 probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, &ctrlr->trid, rc); 711 nvme_ctrlr_lock(ctrlr); 712 nvme_ctrlr_fail(ctrlr, false); 713 nvme_ctrlr_unlock(ctrlr); 714 715 /* allocate a context to detach this controller asynchronously */ 716 detach_ctx = calloc(1, sizeof(*detach_ctx)); 717 if (detach_ctx == NULL) { 718 SPDK_WARNLOG("Failed to allocate asynchronous detach context. Performing synchronous destruct.\n"); 719 nvme_ctrlr_destruct(ctrlr); 720 return; 721 } 722 detach_ctx->ctrlr = ctrlr; 723 TAILQ_INSERT_TAIL(&probe_ctx->failed_ctxs.head, detach_ctx, link); 724 nvme_ctrlr_destruct_async(ctrlr, detach_ctx); 725 return; 726 } 727 728 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 729 return; 730 } 731 732 STAILQ_INIT(&ctrlr->io_producers); 733 734 /* 735 * Controller has been initialized. 736 * Move it to the attached_ctrlrs list. 737 */ 738 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 739 740 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 741 if (nvme_ctrlr_shared(ctrlr)) { 742 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 743 } else { 744 TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq); 745 } 746 747 /* 748 * Increase the ref count before calling attach_cb() as the user may 749 * call nvme_detach() immediately. 750 */ 751 nvme_ctrlr_proc_get_ref(ctrlr); 752 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 753 754 if (probe_ctx->attach_cb) { 755 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 756 } 757 } 758 759 static int 760 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx) 761 { 762 int rc = 0; 763 764 while (true) { 765 rc = spdk_nvme_probe_poll_async(probe_ctx); 766 if (rc != -EAGAIN) { 767 return rc; 768 } 769 } 770 771 return rc; 772 } 773 774 /* This function must not be called while holding g_spdk_nvme_driver->lock */ 775 static struct spdk_nvme_ctrlr * 776 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid, const char *hostnqn) 777 { 778 struct spdk_nvme_ctrlr *ctrlr; 779 780 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 781 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, hostnqn); 782 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 783 784 return ctrlr; 785 } 786 787 /* This function must be called while holding g_spdk_nvme_driver->lock */ 788 struct spdk_nvme_ctrlr * 789 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid, const char *hostnqn) 790 { 791 struct spdk_nvme_ctrlr *ctrlr; 792 793 /* Search per-process list */ 794 TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) { 795 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) { 796 continue; 797 } 798 if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) { 799 continue; 800 } 801 return ctrlr; 802 } 803 804 /* Search multi-process shared list */ 805 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 806 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) { 807 continue; 808 } 809 if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) { 810 continue; 811 } 812 return ctrlr; 813 } 814 815 return NULL; 816 } 817 818 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 819 static int 820 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, 821 bool direct_connect) 822 { 823 int rc; 824 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 825 const struct spdk_nvme_ctrlr_opts *opts = probe_ctx->opts; 826 827 if (strlen(probe_ctx->trid.trstring) == 0) { 828 /* If user didn't provide trstring, derive it from trtype */ 829 spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype); 830 } 831 832 if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) { 833 SPDK_ERRLOG("NVMe trtype %u (%s) not available\n", 834 probe_ctx->trid.trtype, probe_ctx->trid.trstring); 835 return -1; 836 } 837 838 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 839 840 rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); 841 if (rc != 0) { 842 SPDK_ERRLOG("NVMe ctrlr scan failed\n"); 843 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 844 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 845 probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, &ctrlr->trid, -EFAULT); 846 nvme_transport_ctrlr_destruct(ctrlr); 847 } 848 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 849 return -1; 850 } 851 852 /* 853 * Probe controllers on the shared_attached_ctrlrs list 854 */ 855 if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { 856 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 857 /* Do not attach other ctrlrs if user specify a valid trid */ 858 if ((strlen(probe_ctx->trid.traddr) != 0) && 859 (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) { 860 continue; 861 } 862 863 if (opts && strcmp(opts->hostnqn, ctrlr->opts.hostnqn) != 0) { 864 continue; 865 } 866 867 /* Do not attach if we failed to initialize it in this process */ 868 if (nvme_ctrlr_get_current_process(ctrlr) == NULL) { 869 continue; 870 } 871 872 nvme_ctrlr_proc_get_ref(ctrlr); 873 874 /* 875 * Unlock while calling attach_cb() so the user can call other functions 876 * that may take the driver lock, like nvme_detach(). 877 */ 878 if (probe_ctx->attach_cb) { 879 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 880 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 881 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 882 } 883 } 884 } 885 886 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 887 888 return 0; 889 } 890 891 static void 892 nvme_dummy_attach_fail_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 893 int rc) 894 { 895 SPDK_ERRLOG("Failed to attach nvme ctrlr: trtype=%s adrfam=%s traddr=%s trsvcid=%s " 896 "subnqn=%s, %s\n", spdk_nvme_transport_id_trtype_str(trid->trtype), 897 spdk_nvme_transport_id_adrfam_str(trid->adrfam), trid->traddr, trid->trsvcid, 898 trid->subnqn, spdk_strerror(-rc)); 899 } 900 901 static void 902 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx, 903 const struct spdk_nvme_transport_id *trid, 904 const struct spdk_nvme_ctrlr_opts *opts, 905 void *cb_ctx, 906 spdk_nvme_probe_cb probe_cb, 907 spdk_nvme_attach_cb attach_cb, 908 spdk_nvme_attach_fail_cb attach_fail_cb, 909 spdk_nvme_remove_cb remove_cb) 910 { 911 probe_ctx->trid = *trid; 912 probe_ctx->opts = opts; 913 probe_ctx->cb_ctx = cb_ctx; 914 probe_ctx->probe_cb = probe_cb; 915 probe_ctx->attach_cb = attach_cb; 916 if (attach_fail_cb != NULL) { 917 probe_ctx->attach_fail_cb = attach_fail_cb; 918 } else { 919 probe_ctx->attach_fail_cb = nvme_dummy_attach_fail_cb; 920 } 921 probe_ctx->remove_cb = remove_cb; 922 TAILQ_INIT(&probe_ctx->init_ctrlrs); 923 TAILQ_INIT(&probe_ctx->failed_ctxs.head); 924 } 925 926 int 927 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx, 928 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, 929 spdk_nvme_remove_cb remove_cb) 930 { 931 return spdk_nvme_probe_ext(trid, cb_ctx, probe_cb, attach_cb, NULL, remove_cb); 932 } 933 934 int 935 spdk_nvme_probe_ext(const struct spdk_nvme_transport_id *trid, void *cb_ctx, 936 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, 937 spdk_nvme_attach_fail_cb attach_fail_cb, spdk_nvme_remove_cb remove_cb) 938 { 939 struct spdk_nvme_transport_id trid_pcie; 940 struct spdk_nvme_probe_ctx *probe_ctx; 941 942 if (trid == NULL) { 943 memset(&trid_pcie, 0, sizeof(trid_pcie)); 944 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 945 trid = &trid_pcie; 946 } 947 948 probe_ctx = spdk_nvme_probe_async_ext(trid, cb_ctx, probe_cb, 949 attach_cb, attach_fail_cb, remove_cb); 950 if (!probe_ctx) { 951 SPDK_ERRLOG("Create probe context failed\n"); 952 return -1; 953 } 954 955 /* 956 * Keep going even if one or more nvme_attach() calls failed, 957 * but maintain the value of rc to signal errors when we return. 958 */ 959 return nvme_init_controllers(probe_ctx); 960 } 961 962 static bool 963 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 964 struct spdk_nvme_ctrlr_opts *opts) 965 { 966 struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx; 967 968 assert(requested_opts); 969 memcpy(opts, requested_opts, sizeof(*opts)); 970 971 return true; 972 } 973 974 static void 975 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts, 976 const struct spdk_nvme_ctrlr_opts *opts_user, 977 size_t opts_size_user) 978 { 979 assert(opts); 980 assert(opts_user); 981 982 spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user); 983 984 #define FIELD_OK(field) \ 985 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size) 986 987 #define SET_FIELD(field) \ 988 if (FIELD_OK(field)) { \ 989 opts->field = opts_user->field; \ 990 } 991 992 #define SET_FIELD_ARRAY(field) \ 993 if (FIELD_OK(field)) { \ 994 memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \ 995 } 996 997 SET_FIELD(num_io_queues); 998 SET_FIELD(use_cmb_sqs); 999 SET_FIELD(no_shn_notification); 1000 SET_FIELD(enable_interrupts); 1001 SET_FIELD(arb_mechanism); 1002 SET_FIELD(arbitration_burst); 1003 SET_FIELD(low_priority_weight); 1004 SET_FIELD(medium_priority_weight); 1005 SET_FIELD(high_priority_weight); 1006 SET_FIELD(keep_alive_timeout_ms); 1007 SET_FIELD(transport_retry_count); 1008 SET_FIELD(io_queue_size); 1009 SET_FIELD_ARRAY(hostnqn); 1010 SET_FIELD(io_queue_requests); 1011 SET_FIELD_ARRAY(src_addr); 1012 SET_FIELD_ARRAY(src_svcid); 1013 SET_FIELD_ARRAY(host_id); 1014 SET_FIELD_ARRAY(extended_host_id); 1015 SET_FIELD(command_set); 1016 SET_FIELD(admin_timeout_ms); 1017 SET_FIELD(header_digest); 1018 SET_FIELD(data_digest); 1019 SET_FIELD(disable_error_logging); 1020 SET_FIELD(transport_ack_timeout); 1021 SET_FIELD(admin_queue_size); 1022 SET_FIELD(fabrics_connect_timeout_us); 1023 SET_FIELD(disable_read_ana_log_page); 1024 SET_FIELD(disable_read_changed_ns_list_log_page); 1025 SET_FIELD(tls_psk); 1026 SET_FIELD(dhchap_key); 1027 SET_FIELD(dhchap_ctrlr_key); 1028 SET_FIELD(dhchap_digests); 1029 SET_FIELD(dhchap_dhgroups); 1030 1031 #undef FIELD_OK 1032 #undef SET_FIELD 1033 #undef SET_FIELD_ARRAY 1034 } 1035 1036 struct spdk_nvme_ctrlr * 1037 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid, 1038 const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 1039 { 1040 int rc; 1041 struct spdk_nvme_ctrlr *ctrlr = NULL; 1042 struct spdk_nvme_probe_ctx *probe_ctx; 1043 struct spdk_nvme_ctrlr_opts *opts_local_p = NULL; 1044 struct spdk_nvme_ctrlr_opts opts_local; 1045 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 1046 1047 if (trid == NULL) { 1048 SPDK_ERRLOG("No transport ID specified\n"); 1049 return NULL; 1050 } 1051 1052 rc = nvme_driver_init(); 1053 if (rc != 0) { 1054 return NULL; 1055 } 1056 1057 nvme_get_default_hostnqn(hostnqn, sizeof(hostnqn)); 1058 if (opts) { 1059 opts_local_p = &opts_local; 1060 nvme_ctrlr_opts_init(opts_local_p, opts, opts_size); 1061 memcpy(hostnqn, opts_local.hostnqn, sizeof(hostnqn)); 1062 } 1063 1064 probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL); 1065 if (!probe_ctx) { 1066 SPDK_ERRLOG("Create probe context failed\n"); 1067 return NULL; 1068 } 1069 1070 rc = nvme_init_controllers(probe_ctx); 1071 if (rc != 0) { 1072 return NULL; 1073 } 1074 1075 ctrlr = nvme_get_ctrlr_by_trid(trid, hostnqn); 1076 1077 return ctrlr; 1078 } 1079 1080 void 1081 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid, 1082 enum spdk_nvme_transport_type trtype) 1083 { 1084 const char *trstring; 1085 1086 trid->trtype = trtype; 1087 switch (trtype) { 1088 case SPDK_NVME_TRANSPORT_FC: 1089 trstring = SPDK_NVME_TRANSPORT_NAME_FC; 1090 break; 1091 case SPDK_NVME_TRANSPORT_PCIE: 1092 trstring = SPDK_NVME_TRANSPORT_NAME_PCIE; 1093 break; 1094 case SPDK_NVME_TRANSPORT_RDMA: 1095 trstring = SPDK_NVME_TRANSPORT_NAME_RDMA; 1096 break; 1097 case SPDK_NVME_TRANSPORT_TCP: 1098 trstring = SPDK_NVME_TRANSPORT_NAME_TCP; 1099 break; 1100 case SPDK_NVME_TRANSPORT_VFIOUSER: 1101 trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER; 1102 break; 1103 case SPDK_NVME_TRANSPORT_CUSTOM: 1104 trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM; 1105 break; 1106 default: 1107 SPDK_ERRLOG("no available transports\n"); 1108 assert(0); 1109 return; 1110 } 1111 snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring); 1112 } 1113 1114 int 1115 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring) 1116 { 1117 int i = 0; 1118 1119 if (trid == NULL || trstring == NULL) { 1120 return -EINVAL; 1121 } 1122 1123 /* Note: gcc-11 has some false positive -Wstringop-overread warnings with LTO builds if we 1124 * use strnlen here. So do the trstring copy manually instead. See GitHub issue #2391. 1125 */ 1126 1127 /* cast official trstring to uppercase version of input. */ 1128 while (i < SPDK_NVMF_TRSTRING_MAX_LEN && trstring[i] != 0) { 1129 trid->trstring[i] = toupper(trstring[i]); 1130 i++; 1131 } 1132 1133 if (trstring[i] != 0) { 1134 return -EINVAL; 1135 } else { 1136 trid->trstring[i] = 0; 1137 return 0; 1138 } 1139 } 1140 1141 int 1142 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str) 1143 { 1144 if (trtype == NULL || str == NULL) { 1145 return -EINVAL; 1146 } 1147 1148 if (strcasecmp(str, "PCIe") == 0) { 1149 *trtype = SPDK_NVME_TRANSPORT_PCIE; 1150 } else if (strcasecmp(str, "RDMA") == 0) { 1151 *trtype = SPDK_NVME_TRANSPORT_RDMA; 1152 } else if (strcasecmp(str, "FC") == 0) { 1153 *trtype = SPDK_NVME_TRANSPORT_FC; 1154 } else if (strcasecmp(str, "TCP") == 0) { 1155 *trtype = SPDK_NVME_TRANSPORT_TCP; 1156 } else if (strcasecmp(str, "VFIOUSER") == 0) { 1157 *trtype = SPDK_NVME_TRANSPORT_VFIOUSER; 1158 } else { 1159 *trtype = SPDK_NVME_TRANSPORT_CUSTOM; 1160 } 1161 return 0; 1162 } 1163 1164 const char * 1165 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype) 1166 { 1167 switch (trtype) { 1168 case SPDK_NVME_TRANSPORT_PCIE: 1169 return "PCIe"; 1170 case SPDK_NVME_TRANSPORT_RDMA: 1171 return "RDMA"; 1172 case SPDK_NVME_TRANSPORT_FC: 1173 return "FC"; 1174 case SPDK_NVME_TRANSPORT_TCP: 1175 return "TCP"; 1176 case SPDK_NVME_TRANSPORT_VFIOUSER: 1177 return "VFIOUSER"; 1178 case SPDK_NVME_TRANSPORT_CUSTOM: 1179 return "CUSTOM"; 1180 default: 1181 return NULL; 1182 } 1183 } 1184 1185 int 1186 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str) 1187 { 1188 if (adrfam == NULL || str == NULL) { 1189 return -EINVAL; 1190 } 1191 1192 if (strcasecmp(str, "IPv4") == 0) { 1193 *adrfam = SPDK_NVMF_ADRFAM_IPV4; 1194 } else if (strcasecmp(str, "IPv6") == 0) { 1195 *adrfam = SPDK_NVMF_ADRFAM_IPV6; 1196 } else if (strcasecmp(str, "IB") == 0) { 1197 *adrfam = SPDK_NVMF_ADRFAM_IB; 1198 } else if (strcasecmp(str, "FC") == 0) { 1199 *adrfam = SPDK_NVMF_ADRFAM_FC; 1200 } else { 1201 return -ENOENT; 1202 } 1203 return 0; 1204 } 1205 1206 const char * 1207 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam) 1208 { 1209 switch (adrfam) { 1210 case SPDK_NVMF_ADRFAM_IPV4: 1211 return "IPv4"; 1212 case SPDK_NVMF_ADRFAM_IPV6: 1213 return "IPv6"; 1214 case SPDK_NVMF_ADRFAM_IB: 1215 return "IB"; 1216 case SPDK_NVMF_ADRFAM_FC: 1217 return "FC"; 1218 default: 1219 return NULL; 1220 } 1221 } 1222 1223 static size_t 1224 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size) 1225 { 1226 1227 const char *sep, *sep1; 1228 const char *whitespace = " \t\n"; 1229 size_t key_len, val_len; 1230 1231 *str += strspn(*str, whitespace); 1232 1233 sep = strchr(*str, ':'); 1234 if (!sep) { 1235 sep = strchr(*str, '='); 1236 if (!sep) { 1237 SPDK_ERRLOG("Key without ':' or '=' separator\n"); 1238 return 0; 1239 } 1240 } else { 1241 sep1 = strchr(*str, '='); 1242 if ((sep1 != NULL) && (sep1 < sep)) { 1243 sep = sep1; 1244 } 1245 } 1246 1247 key_len = sep - *str; 1248 if (key_len >= key_buf_size) { 1249 SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n", 1250 key_len, key_buf_size - 1); 1251 return 0; 1252 } 1253 1254 memcpy(key, *str, key_len); 1255 key[key_len] = '\0'; 1256 1257 *str += key_len + 1; /* Skip key: */ 1258 val_len = strcspn(*str, whitespace); 1259 if (val_len == 0) { 1260 SPDK_ERRLOG("Key without value\n"); 1261 return 0; 1262 } 1263 1264 if (val_len >= val_buf_size) { 1265 SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n", 1266 val_len, val_buf_size - 1); 1267 return 0; 1268 } 1269 1270 memcpy(val, *str, val_len); 1271 val[val_len] = '\0'; 1272 1273 *str += val_len; 1274 1275 return val_len; 1276 } 1277 1278 int 1279 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str) 1280 { 1281 size_t val_len; 1282 char key[32]; 1283 char val[1024]; 1284 1285 if (trid == NULL || str == NULL) { 1286 return -EINVAL; 1287 } 1288 1289 while (*str != '\0') { 1290 1291 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1292 1293 if (val_len == 0) { 1294 SPDK_ERRLOG("Failed to parse transport ID\n"); 1295 return -EINVAL; 1296 } 1297 1298 if (strcasecmp(key, "trtype") == 0) { 1299 if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) { 1300 SPDK_ERRLOG("invalid transport '%s'\n", val); 1301 return -EINVAL; 1302 } 1303 if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) { 1304 SPDK_ERRLOG("Unknown trtype '%s'\n", val); 1305 return -EINVAL; 1306 } 1307 } else if (strcasecmp(key, "adrfam") == 0) { 1308 if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) { 1309 SPDK_ERRLOG("Unknown adrfam '%s'\n", val); 1310 return -EINVAL; 1311 } 1312 } else if (strcasecmp(key, "traddr") == 0) { 1313 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1314 SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n", 1315 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1316 return -EINVAL; 1317 } 1318 memcpy(trid->traddr, val, val_len + 1); 1319 } else if (strcasecmp(key, "trsvcid") == 0) { 1320 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1321 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1322 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1323 return -EINVAL; 1324 } 1325 memcpy(trid->trsvcid, val, val_len + 1); 1326 } else if (strcasecmp(key, "priority") == 0) { 1327 if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) { 1328 SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n", 1329 val_len, SPDK_NVMF_PRIORITY_MAX_LEN); 1330 return -EINVAL; 1331 } 1332 trid->priority = spdk_strtol(val, 10); 1333 } else if (strcasecmp(key, "subnqn") == 0) { 1334 if (val_len > SPDK_NVMF_NQN_MAX_LEN) { 1335 SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n", 1336 val_len, SPDK_NVMF_NQN_MAX_LEN); 1337 return -EINVAL; 1338 } 1339 memcpy(trid->subnqn, val, val_len + 1); 1340 } else if (strcasecmp(key, "hostaddr") == 0) { 1341 continue; 1342 } else if (strcasecmp(key, "hostsvcid") == 0) { 1343 continue; 1344 } else if (strcasecmp(key, "hostnqn") == 0) { 1345 continue; 1346 } else if (strcasecmp(key, "ns") == 0) { 1347 /* 1348 * Special case. The namespace id parameter may 1349 * optionally be passed in the transport id string 1350 * for an SPDK application (e.g. spdk_nvme_perf) 1351 * and additionally parsed therein to limit 1352 * targeting a specific namespace. For this 1353 * scenario, just silently ignore this key 1354 * rather than letting it default to logging 1355 * it as an invalid key. 1356 */ 1357 continue; 1358 } else if (strcasecmp(key, "alt_traddr") == 0) { 1359 /* 1360 * Used by applications for enabling transport ID failover. 1361 * Please see the case above for more information on custom parameters. 1362 */ 1363 continue; 1364 } else { 1365 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1366 } 1367 } 1368 1369 return 0; 1370 } 1371 1372 int 1373 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str) 1374 { 1375 1376 size_t key_size = 32; 1377 size_t val_size = 1024; 1378 size_t val_len; 1379 char key[key_size]; 1380 char val[val_size]; 1381 1382 if (hostid == NULL || str == NULL) { 1383 return -EINVAL; 1384 } 1385 1386 while (*str != '\0') { 1387 1388 val_len = parse_next_key(&str, key, val, key_size, val_size); 1389 1390 if (val_len == 0) { 1391 SPDK_ERRLOG("Failed to parse host ID\n"); 1392 return val_len; 1393 } 1394 1395 /* Ignore the rest of the options from the transport ID. */ 1396 if (strcasecmp(key, "trtype") == 0) { 1397 continue; 1398 } else if (strcasecmp(key, "adrfam") == 0) { 1399 continue; 1400 } else if (strcasecmp(key, "traddr") == 0) { 1401 continue; 1402 } else if (strcasecmp(key, "trsvcid") == 0) { 1403 continue; 1404 } else if (strcasecmp(key, "subnqn") == 0) { 1405 continue; 1406 } else if (strcasecmp(key, "priority") == 0) { 1407 continue; 1408 } else if (strcasecmp(key, "ns") == 0) { 1409 continue; 1410 } else if (strcasecmp(key, "hostaddr") == 0) { 1411 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1412 SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n", 1413 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1414 return -EINVAL; 1415 } 1416 memcpy(hostid->hostaddr, val, val_len + 1); 1417 1418 } else if (strcasecmp(key, "hostsvcid") == 0) { 1419 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1420 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1421 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1422 return -EINVAL; 1423 } 1424 memcpy(hostid->hostsvcid, val, val_len + 1); 1425 } else { 1426 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1427 } 1428 } 1429 1430 return 0; 1431 } 1432 1433 static int 1434 cmp_int(int a, int b) 1435 { 1436 return a - b; 1437 } 1438 1439 int 1440 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1, 1441 const struct spdk_nvme_transport_id *trid2) 1442 { 1443 int cmp; 1444 1445 if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) { 1446 cmp = strcasecmp(trid1->trstring, trid2->trstring); 1447 } else { 1448 cmp = cmp_int(trid1->trtype, trid2->trtype); 1449 } 1450 1451 if (cmp) { 1452 return cmp; 1453 } 1454 1455 if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1456 struct spdk_pci_addr pci_addr1 = {}; 1457 struct spdk_pci_addr pci_addr2 = {}; 1458 1459 /* Normalize PCI addresses before comparing */ 1460 if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 || 1461 spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) { 1462 return -1; 1463 } 1464 1465 /* PCIe transport ID only uses trtype and traddr */ 1466 return spdk_pci_addr_compare(&pci_addr1, &pci_addr2); 1467 } 1468 1469 cmp = strcasecmp(trid1->traddr, trid2->traddr); 1470 if (cmp) { 1471 return cmp; 1472 } 1473 1474 cmp = cmp_int(trid1->adrfam, trid2->adrfam); 1475 if (cmp) { 1476 return cmp; 1477 } 1478 1479 cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid); 1480 if (cmp) { 1481 return cmp; 1482 } 1483 1484 cmp = strcmp(trid1->subnqn, trid2->subnqn); 1485 if (cmp) { 1486 return cmp; 1487 } 1488 1489 return 0; 1490 } 1491 1492 int 1493 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str) 1494 { 1495 size_t val_len; 1496 char key[32]; 1497 char val[1024]; 1498 1499 if (prchk_flags == NULL || str == NULL) { 1500 return -EINVAL; 1501 } 1502 1503 while (*str != '\0') { 1504 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1505 1506 if (val_len == 0) { 1507 SPDK_ERRLOG("Failed to parse prchk\n"); 1508 return -EINVAL; 1509 } 1510 1511 if (strcasecmp(key, "prchk") == 0) { 1512 if (strcasestr(val, "reftag") != NULL) { 1513 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; 1514 } 1515 if (strcasestr(val, "guard") != NULL) { 1516 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; 1517 } 1518 } else { 1519 SPDK_ERRLOG("Unknown key '%s'\n", key); 1520 return -EINVAL; 1521 } 1522 } 1523 1524 return 0; 1525 } 1526 1527 const char * 1528 spdk_nvme_prchk_flags_str(uint32_t prchk_flags) 1529 { 1530 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { 1531 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1532 return "prchk:reftag|guard"; 1533 } else { 1534 return "prchk:reftag"; 1535 } 1536 } else { 1537 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1538 return "prchk:guard"; 1539 } else { 1540 return NULL; 1541 } 1542 } 1543 } 1544 1545 int 1546 spdk_nvme_scan_attached(const struct spdk_nvme_transport_id *trid) 1547 { 1548 int rc; 1549 struct spdk_nvme_probe_ctx *probe_ctx; 1550 1551 rc = nvme_driver_init(); 1552 if (rc != 0) { 1553 return rc; 1554 } 1555 1556 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1557 if (!probe_ctx) { 1558 return -ENOMEM; 1559 } 1560 1561 nvme_probe_ctx_init(probe_ctx, trid, NULL, NULL, NULL, NULL, NULL, NULL); 1562 1563 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 1564 rc = nvme_transport_ctrlr_scan_attached(probe_ctx); 1565 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 1566 free(probe_ctx); 1567 1568 return rc < 0 ? rc : 0; 1569 } 1570 1571 struct spdk_nvme_probe_ctx * 1572 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, 1573 void *cb_ctx, 1574 spdk_nvme_probe_cb probe_cb, 1575 spdk_nvme_attach_cb attach_cb, 1576 spdk_nvme_remove_cb remove_cb) 1577 { 1578 return spdk_nvme_probe_async_ext(trid, cb_ctx, probe_cb, attach_cb, NULL, remove_cb); 1579 } 1580 1581 struct spdk_nvme_probe_ctx * 1582 spdk_nvme_probe_async_ext(const struct spdk_nvme_transport_id *trid, 1583 void *cb_ctx, 1584 spdk_nvme_probe_cb probe_cb, 1585 spdk_nvme_attach_cb attach_cb, 1586 spdk_nvme_attach_fail_cb attach_fail_cb, 1587 spdk_nvme_remove_cb remove_cb) 1588 { 1589 int rc; 1590 struct spdk_nvme_probe_ctx *probe_ctx; 1591 1592 rc = nvme_driver_init(); 1593 if (rc != 0) { 1594 return NULL; 1595 } 1596 1597 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1598 if (!probe_ctx) { 1599 return NULL; 1600 } 1601 1602 nvme_probe_ctx_init(probe_ctx, trid, NULL, cb_ctx, probe_cb, attach_cb, attach_fail_cb, 1603 remove_cb); 1604 rc = nvme_probe_internal(probe_ctx, false); 1605 if (rc != 0) { 1606 free(probe_ctx); 1607 return NULL; 1608 } 1609 1610 return probe_ctx; 1611 } 1612 1613 int 1614 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) 1615 { 1616 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 1617 struct nvme_ctrlr_detach_ctx *detach_ctx, *detach_ctx_tmp; 1618 int rc; 1619 1620 if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1621 free(probe_ctx); 1622 return 0; 1623 } 1624 1625 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 1626 nvme_ctrlr_poll_internal(ctrlr, probe_ctx); 1627 } 1628 1629 /* poll failed controllers destruction */ 1630 TAILQ_FOREACH_SAFE(detach_ctx, &probe_ctx->failed_ctxs.head, link, detach_ctx_tmp) { 1631 rc = nvme_ctrlr_destruct_poll_async(detach_ctx->ctrlr, detach_ctx); 1632 if (rc == -EAGAIN) { 1633 continue; 1634 } 1635 1636 if (rc != 0) { 1637 SPDK_ERRLOG("Failure while polling the controller destruction (rc = %d)\n", rc); 1638 } 1639 1640 TAILQ_REMOVE(&probe_ctx->failed_ctxs.head, detach_ctx, link); 1641 free(detach_ctx); 1642 } 1643 1644 if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs) && TAILQ_EMPTY(&probe_ctx->failed_ctxs.head)) { 1645 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 1646 g_spdk_nvme_driver->initialized = true; 1647 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 1648 free(probe_ctx); 1649 return 0; 1650 } 1651 1652 return -EAGAIN; 1653 } 1654 1655 struct spdk_nvme_probe_ctx * 1656 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid, 1657 const struct spdk_nvme_ctrlr_opts *opts, 1658 spdk_nvme_attach_cb attach_cb) 1659 { 1660 int rc; 1661 spdk_nvme_probe_cb probe_cb = NULL; 1662 struct spdk_nvme_probe_ctx *probe_ctx; 1663 1664 rc = nvme_driver_init(); 1665 if (rc != 0) { 1666 return NULL; 1667 } 1668 1669 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1670 if (!probe_ctx) { 1671 return NULL; 1672 } 1673 1674 if (opts) { 1675 probe_cb = nvme_connect_probe_cb; 1676 } 1677 1678 nvme_probe_ctx_init(probe_ctx, trid, opts, (void *)opts, probe_cb, attach_cb, NULL, NULL); 1679 rc = nvme_probe_internal(probe_ctx, true); 1680 if (rc != 0) { 1681 free(probe_ctx); 1682 return NULL; 1683 } 1684 1685 return probe_ctx; 1686 } 1687 1688 int 1689 nvme_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service, 1690 long int *port) 1691 { 1692 struct addrinfo *res; 1693 struct addrinfo hints; 1694 int ret; 1695 1696 memset(&hints, 0, sizeof(hints)); 1697 hints.ai_family = family; 1698 hints.ai_socktype = SOCK_STREAM; 1699 hints.ai_protocol = 0; 1700 1701 if (service != NULL) { 1702 *port = spdk_strtol(service, 10); 1703 if (*port <= 0 || *port >= 65536) { 1704 SPDK_ERRLOG("Invalid port: %s\n", service); 1705 return -EINVAL; 1706 } 1707 } 1708 1709 ret = getaddrinfo(addr, service, &hints, &res); 1710 if (ret) { 1711 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 1712 return -(abs(ret)); 1713 } 1714 1715 if (res->ai_addrlen > sizeof(*sa)) { 1716 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 1717 ret = -EINVAL; 1718 } else { 1719 memcpy(sa, res->ai_addr, res->ai_addrlen); 1720 } 1721 1722 freeaddrinfo(res); 1723 return ret; 1724 } 1725 1726 int 1727 nvme_get_default_hostnqn(char *buf, int len) 1728 { 1729 char uuid[SPDK_UUID_STRING_LEN]; 1730 int rc; 1731 1732 spdk_uuid_fmt_lower(uuid, sizeof(uuid), &g_spdk_nvme_driver->default_extended_host_id); 1733 rc = snprintf(buf, len, "nqn.2014-08.org.nvmexpress:uuid:%s", uuid); 1734 if (rc < 0 || rc >= len) { 1735 return -EINVAL; 1736 } 1737 1738 return 0; 1739 } 1740 1741 SPDK_LOG_REGISTER_COMPONENT(nvme) 1742