1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/config.h" 7 #include "spdk/nvmf_spec.h" 8 #include "spdk/string.h" 9 #include "spdk/env.h" 10 #include "nvme_internal.h" 11 #include "nvme_io_msg.h" 12 13 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" 14 15 struct nvme_driver *g_spdk_nvme_driver; 16 pid_t g_spdk_nvme_pid; 17 18 /* gross timeout of 180 seconds in milliseconds */ 19 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000; 20 21 /* Per-process attached controller list */ 22 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs = 23 TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs); 24 25 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */ 26 static bool 27 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) 28 { 29 return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; 30 } 31 32 void 33 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, 34 struct spdk_nvme_ctrlr *ctrlr) 35 { 36 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 37 } 38 39 static void 40 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr) 41 { 42 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 43 if (nvme_ctrlr_shared(ctrlr)) { 44 TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 45 } else { 46 TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); 47 } 48 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 49 } 50 51 static int 52 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr, 53 struct nvme_ctrlr_detach_ctx **_ctx) 54 { 55 struct nvme_ctrlr_detach_ctx *ctx; 56 int ref_count; 57 58 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 59 60 ref_count = nvme_ctrlr_get_ref_count(ctrlr); 61 assert(ref_count > 0); 62 63 if (ref_count == 1) { 64 /* This is the last reference to the controller, so we need to 65 * allocate a context to destruct it. 66 */ 67 ctx = calloc(1, sizeof(*ctx)); 68 if (ctx == NULL) { 69 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 70 71 return -ENOMEM; 72 } 73 ctx->ctrlr = ctrlr; 74 ctx->cb_fn = nvme_ctrlr_detach_async_finish; 75 76 nvme_ctrlr_proc_put_ref(ctrlr); 77 78 nvme_io_msg_ctrlr_detach(ctrlr); 79 80 nvme_ctrlr_destruct_async(ctrlr, ctx); 81 82 *_ctx = ctx; 83 } else { 84 nvme_ctrlr_proc_put_ref(ctrlr); 85 } 86 87 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 88 89 return 0; 90 } 91 92 static int 93 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx) 94 { 95 int rc; 96 97 rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx); 98 if (rc == -EAGAIN) { 99 return -EAGAIN; 100 } 101 102 free(ctx); 103 104 return rc; 105 } 106 107 int 108 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr) 109 { 110 struct nvme_ctrlr_detach_ctx *ctx = NULL; 111 int rc; 112 113 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 114 if (rc != 0) { 115 return rc; 116 } else if (ctx == NULL) { 117 /* ctrlr was detached from the caller process but any other process 118 * still attaches it. 119 */ 120 return 0; 121 } 122 123 while (1) { 124 rc = nvme_ctrlr_detach_poll_async(ctx); 125 if (rc != -EAGAIN) { 126 break; 127 } 128 nvme_delay(1000); 129 } 130 131 return 0; 132 } 133 134 int 135 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr, 136 struct spdk_nvme_detach_ctx **_detach_ctx) 137 { 138 struct spdk_nvme_detach_ctx *detach_ctx; 139 struct nvme_ctrlr_detach_ctx *ctx = NULL; 140 int rc; 141 142 if (ctrlr == NULL || _detach_ctx == NULL) { 143 return -EINVAL; 144 } 145 146 /* Use a context header to poll detachment for multiple controllers. 147 * Allocate an new one if not allocated yet, or use the passed one otherwise. 148 */ 149 detach_ctx = *_detach_ctx; 150 if (detach_ctx == NULL) { 151 detach_ctx = calloc(1, sizeof(*detach_ctx)); 152 if (detach_ctx == NULL) { 153 return -ENOMEM; 154 } 155 TAILQ_INIT(&detach_ctx->head); 156 } 157 158 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 159 if (rc != 0 || ctx == NULL) { 160 /* If this detach failed and the context header is empty, it means we just 161 * allocated the header and need to free it before returning. 162 */ 163 if (TAILQ_EMPTY(&detach_ctx->head)) { 164 free(detach_ctx); 165 } 166 return rc; 167 } 168 169 /* Append a context for this detachment to the context header. */ 170 TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link); 171 172 *_detach_ctx = detach_ctx; 173 174 return 0; 175 } 176 177 int 178 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx) 179 { 180 struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx; 181 int rc; 182 183 if (detach_ctx == NULL) { 184 return -EINVAL; 185 } 186 187 TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) { 188 TAILQ_REMOVE(&detach_ctx->head, ctx, link); 189 190 rc = nvme_ctrlr_detach_poll_async(ctx); 191 if (rc == -EAGAIN) { 192 /* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */ 193 TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link); 194 } 195 } 196 197 if (!TAILQ_EMPTY(&detach_ctx->head)) { 198 return -EAGAIN; 199 } 200 201 free(detach_ctx); 202 return 0; 203 } 204 205 void 206 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx) 207 { 208 while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) { 209 ; 210 } 211 } 212 213 void 214 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) 215 { 216 struct nvme_completion_poll_status *status = arg; 217 218 if (status->timed_out) { 219 /* There is no routine waiting for the completion of this request, free allocated memory */ 220 spdk_free(status->dma_data); 221 free(status); 222 return; 223 } 224 225 /* 226 * Copy status into the argument passed by the caller, so that 227 * the caller can check the status to determine if the 228 * the request passed or failed. 229 */ 230 memcpy(&status->cpl, cpl, sizeof(*cpl)); 231 status->done = true; 232 } 233 234 static void 235 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 236 { 237 } 238 239 int 240 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair, 241 struct nvme_completion_poll_status *status, 242 pthread_mutex_t *robust_mutex) 243 { 244 int rc; 245 246 if (robust_mutex) { 247 nvme_robust_mutex_lock(robust_mutex); 248 } 249 250 if (qpair->poll_group) { 251 rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0, 252 dummy_disconnected_qpair_cb); 253 } else { 254 rc = spdk_nvme_qpair_process_completions(qpair, 0); 255 } 256 257 if (robust_mutex) { 258 nvme_robust_mutex_unlock(robust_mutex); 259 } 260 261 if (rc < 0) { 262 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 263 status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 264 goto error; 265 } 266 267 if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) { 268 goto error; 269 } 270 271 if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 272 union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr); 273 if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) { 274 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 275 status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 276 goto error; 277 } 278 } 279 280 if (!status->done) { 281 return -EAGAIN; 282 } else if (spdk_nvme_cpl_is_error(&status->cpl)) { 283 return -EIO; 284 } else { 285 return 0; 286 } 287 error: 288 /* Either transport error occurred or we've timed out. Either way, if the response hasn't 289 * been received yet, mark the command as timed out, so the status gets freed when the 290 * command is completed or aborted. 291 */ 292 if (!status->done) { 293 status->timed_out = true; 294 } 295 296 return -ECANCELED; 297 } 298 299 /** 300 * Poll qpair for completions until a command completes. 301 * 302 * \param qpair queue to poll 303 * \param status completion status. The user must fill this structure with zeroes before calling 304 * this function 305 * \param robust_mutex optional robust mutex to lock while polling qpair 306 * \param timeout_in_usecs optional timeout 307 * 308 * \return 0 if command completed without error, 309 * -EIO if command completed with error, 310 * -ECANCELED if command is not completed due to transport/device error or time expired 311 * 312 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 313 * and status as the callback argument. 314 */ 315 int 316 nvme_wait_for_completion_robust_lock_timeout( 317 struct spdk_nvme_qpair *qpair, 318 struct nvme_completion_poll_status *status, 319 pthread_mutex_t *robust_mutex, 320 uint64_t timeout_in_usecs) 321 { 322 int rc; 323 324 if (timeout_in_usecs) { 325 status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs * 326 spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 327 } else { 328 status->timeout_tsc = 0; 329 } 330 331 status->cpl.status_raw = 0; 332 do { 333 rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex); 334 } while (rc == -EAGAIN); 335 336 return rc; 337 } 338 339 /** 340 * Poll qpair for completions until a command completes. 341 * 342 * \param qpair queue to poll 343 * \param status completion status. The user must fill this structure with zeroes before calling 344 * this function 345 * \param robust_mutex optional robust mutex to lock while polling qpair 346 * 347 * \return 0 if command completed without error, 348 * -EIO if command completed with error, 349 * -ECANCELED if command is not completed due to transport/device error 350 * 351 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 352 * and status as the callback argument. 353 */ 354 int 355 nvme_wait_for_completion_robust_lock( 356 struct spdk_nvme_qpair *qpair, 357 struct nvme_completion_poll_status *status, 358 pthread_mutex_t *robust_mutex) 359 { 360 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0); 361 } 362 363 int 364 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, 365 struct nvme_completion_poll_status *status) 366 { 367 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0); 368 } 369 370 /** 371 * Poll qpair for completions until a command completes. 372 * 373 * \param qpair queue to poll 374 * \param status completion status. The user must fill this structure with zeroes before calling 375 * this function 376 * \param timeout_in_usecs optional timeout 377 * 378 * \return 0 if command completed without error, 379 * -EIO if command completed with error, 380 * -ECANCELED if command is not completed due to transport/device error or time expired 381 * 382 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 383 * and status as the callback argument. 384 */ 385 int 386 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, 387 struct nvme_completion_poll_status *status, 388 uint64_t timeout_in_usecs) 389 { 390 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs); 391 } 392 393 static void 394 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl) 395 { 396 struct nvme_request *req = arg; 397 spdk_nvme_cmd_cb user_cb_fn; 398 void *user_cb_arg; 399 enum spdk_nvme_data_transfer xfer; 400 401 if (req->user_buffer && req->payload_size) { 402 /* Copy back to the user buffer */ 403 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 404 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 405 if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 406 xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { 407 assert(req->pid == getpid()); 408 memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size); 409 } 410 } 411 412 user_cb_fn = req->user_cb_fn; 413 user_cb_arg = req->user_cb_arg; 414 nvme_cleanup_user_req(req); 415 416 /* Call the user's original callback now that the buffer has been copied */ 417 user_cb_fn(user_cb_arg, cpl); 418 419 } 420 421 /** 422 * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer. 423 * 424 * This is intended for use in non-fast-path functions (admin commands, reservations, etc.) 425 * where the overhead of a copy is not a problem. 426 */ 427 struct nvme_request * 428 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, 429 void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, 430 void *cb_arg, bool host_to_controller) 431 { 432 struct nvme_request *req; 433 void *dma_buffer = NULL; 434 435 if (buffer && payload_size) { 436 dma_buffer = spdk_zmalloc(payload_size, 4096, NULL, 437 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 438 if (!dma_buffer) { 439 return NULL; 440 } 441 442 if (host_to_controller) { 443 memcpy(dma_buffer, buffer, payload_size); 444 } 445 } 446 447 req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete, 448 NULL); 449 if (!req) { 450 spdk_free(dma_buffer); 451 return NULL; 452 } 453 454 req->user_cb_fn = cb_fn; 455 req->user_cb_arg = cb_arg; 456 req->user_buffer = buffer; 457 req->cb_arg = req; 458 459 return req; 460 } 461 462 /** 463 * Check if a request has exceeded the controller timeout. 464 * 465 * \param req request to check for timeout. 466 * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn) 467 * \param active_proc per-process data for the controller associated with req 468 * \param now_tick current time from spdk_get_ticks() 469 * \return 0 if requests submitted more recently than req should still be checked for timeouts, or 470 * 1 if requests newer than req need not be checked. 471 * 472 * The request's timeout callback will be called if needed; the caller is only responsible for 473 * calling this function on each outstanding request. 474 */ 475 int 476 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, 477 struct spdk_nvme_ctrlr_process *active_proc, 478 uint64_t now_tick) 479 { 480 struct spdk_nvme_qpair *qpair = req->qpair; 481 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 482 uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ? 483 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks; 484 485 assert(active_proc->timeout_cb_fn != NULL); 486 487 if (req->timed_out || req->submit_tick == 0) { 488 return 0; 489 } 490 491 if (req->pid != g_spdk_nvme_pid) { 492 return 0; 493 } 494 495 if (nvme_qpair_is_admin_queue(qpair) && 496 req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 497 return 0; 498 } 499 500 if (req->submit_tick + timeout_ticks > now_tick) { 501 return 1; 502 } 503 504 req->timed_out = true; 505 506 /* 507 * We don't want to expose the admin queue to the user, 508 * so when we're timing out admin commands set the 509 * qpair to NULL. 510 */ 511 active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, 512 nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, 513 cid); 514 return 0; 515 } 516 517 int 518 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx) 519 { 520 int rc = 0; 521 522 #ifdef __FreeBSD__ 523 pthread_mutex_init(mtx, NULL); 524 #else 525 pthread_mutexattr_t attr; 526 527 if (pthread_mutexattr_init(&attr)) { 528 return -1; 529 } 530 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 531 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 532 pthread_mutex_init(mtx, &attr)) { 533 rc = -1; 534 } 535 pthread_mutexattr_destroy(&attr); 536 #endif 537 538 return rc; 539 } 540 541 int 542 nvme_driver_init(void) 543 { 544 static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER; 545 int ret = 0; 546 /* Any socket ID */ 547 int socket_id = -1; 548 549 /* Use a special process-private mutex to ensure the global 550 * nvme driver object (g_spdk_nvme_driver) gets initialized by 551 * only one thread. Once that object is established and its 552 * mutex is initialized, we can unlock this mutex and use that 553 * one instead. 554 */ 555 pthread_mutex_lock(&g_init_mutex); 556 557 /* Each process needs its own pid. */ 558 g_spdk_nvme_pid = getpid(); 559 560 /* 561 * Only one thread from one process will do this driver init work. 562 * The primary process will reserve the shared memory and do the 563 * initialization. 564 * The secondary process will lookup the existing reserved memory. 565 */ 566 if (spdk_process_is_primary()) { 567 /* The unique named memzone already reserved. */ 568 if (g_spdk_nvme_driver != NULL) { 569 pthread_mutex_unlock(&g_init_mutex); 570 return 0; 571 } else { 572 g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME, 573 sizeof(struct nvme_driver), socket_id, 574 SPDK_MEMZONE_NO_IOVA_CONTIG); 575 } 576 577 if (g_spdk_nvme_driver == NULL) { 578 SPDK_ERRLOG("primary process failed to reserve memory\n"); 579 pthread_mutex_unlock(&g_init_mutex); 580 return -1; 581 } 582 } else { 583 g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME); 584 585 /* The unique named memzone already reserved by the primary process. */ 586 if (g_spdk_nvme_driver != NULL) { 587 int ms_waited = 0; 588 589 /* Wait the nvme driver to get initialized. */ 590 while ((g_spdk_nvme_driver->initialized == false) && 591 (ms_waited < g_nvme_driver_timeout_ms)) { 592 ms_waited++; 593 nvme_delay(1000); /* delay 1ms */ 594 } 595 if (g_spdk_nvme_driver->initialized == false) { 596 SPDK_ERRLOG("timeout waiting for primary process to init\n"); 597 pthread_mutex_unlock(&g_init_mutex); 598 return -1; 599 } 600 } else { 601 SPDK_ERRLOG("primary process is not started yet\n"); 602 pthread_mutex_unlock(&g_init_mutex); 603 return -1; 604 } 605 606 pthread_mutex_unlock(&g_init_mutex); 607 return 0; 608 } 609 610 /* 611 * At this moment, only one thread from the primary process will do 612 * the g_spdk_nvme_driver initialization 613 */ 614 assert(spdk_process_is_primary()); 615 616 ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock); 617 if (ret != 0) { 618 SPDK_ERRLOG("failed to initialize mutex\n"); 619 spdk_memzone_free(SPDK_NVME_DRIVER_NAME); 620 pthread_mutex_unlock(&g_init_mutex); 621 return ret; 622 } 623 624 /* The lock in the shared g_spdk_nvme_driver object is now ready to 625 * be used - so we can unlock the g_init_mutex here. 626 */ 627 pthread_mutex_unlock(&g_init_mutex); 628 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 629 630 g_spdk_nvme_driver->initialized = false; 631 g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen(); 632 if (g_spdk_nvme_driver->hotplug_fd < 0) { 633 SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n"); 634 } 635 636 TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs); 637 638 spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id); 639 640 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 641 642 return ret; 643 } 644 645 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 646 int 647 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, 648 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle) 649 { 650 struct spdk_nvme_ctrlr *ctrlr; 651 struct spdk_nvme_ctrlr_opts opts; 652 653 assert(trid != NULL); 654 655 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 656 657 if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) { 658 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid); 659 if (ctrlr) { 660 /* This ctrlr already exists. */ 661 662 if (ctrlr->is_destructed) { 663 /* This ctrlr is being destructed asynchronously. */ 664 SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n", 665 trid->traddr); 666 return -EBUSY; 667 } 668 669 /* Increase the ref count before calling attach_cb() as the user may 670 * call nvme_detach() immediately. */ 671 nvme_ctrlr_proc_get_ref(ctrlr); 672 673 if (probe_ctx->attach_cb) { 674 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 675 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 676 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 677 } 678 return 0; 679 } 680 681 ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle); 682 if (ctrlr == NULL) { 683 SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr); 684 return -1; 685 } 686 ctrlr->remove_cb = probe_ctx->remove_cb; 687 ctrlr->cb_ctx = probe_ctx->cb_ctx; 688 689 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 690 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 691 return 0; 692 } 693 694 return 1; 695 } 696 697 static void 698 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, 699 struct spdk_nvme_probe_ctx *probe_ctx) 700 { 701 int rc = 0; 702 703 rc = nvme_ctrlr_process_init(ctrlr); 704 705 if (rc) { 706 /* Controller failed to initialize. */ 707 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 708 SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr); 709 nvme_ctrlr_lock(ctrlr); 710 nvme_ctrlr_fail(ctrlr, false); 711 nvme_ctrlr_unlock(ctrlr); 712 nvme_ctrlr_destruct(ctrlr); 713 return; 714 } 715 716 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 717 return; 718 } 719 720 STAILQ_INIT(&ctrlr->io_producers); 721 722 /* 723 * Controller has been initialized. 724 * Move it to the attached_ctrlrs list. 725 */ 726 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 727 728 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 729 if (nvme_ctrlr_shared(ctrlr)) { 730 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 731 } else { 732 TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq); 733 } 734 735 /* 736 * Increase the ref count before calling attach_cb() as the user may 737 * call nvme_detach() immediately. 738 */ 739 nvme_ctrlr_proc_get_ref(ctrlr); 740 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 741 742 if (probe_ctx->attach_cb) { 743 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 744 } 745 } 746 747 static int 748 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx) 749 { 750 int rc = 0; 751 752 while (true) { 753 rc = spdk_nvme_probe_poll_async(probe_ctx); 754 if (rc != -EAGAIN) { 755 return rc; 756 } 757 } 758 759 return rc; 760 } 761 762 /* This function must not be called while holding g_spdk_nvme_driver->lock */ 763 static struct spdk_nvme_ctrlr * 764 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid) 765 { 766 struct spdk_nvme_ctrlr *ctrlr; 767 768 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 769 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid); 770 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 771 772 return ctrlr; 773 } 774 775 /* This function must be called while holding g_spdk_nvme_driver->lock */ 776 struct spdk_nvme_ctrlr * 777 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid) 778 { 779 struct spdk_nvme_ctrlr *ctrlr; 780 781 /* Search per-process list */ 782 TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) { 783 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) { 784 return ctrlr; 785 } 786 } 787 788 /* Search multi-process shared list */ 789 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 790 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) { 791 return ctrlr; 792 } 793 } 794 795 return NULL; 796 } 797 798 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 799 static int 800 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, 801 bool direct_connect) 802 { 803 int rc; 804 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 805 806 if (strlen(probe_ctx->trid.trstring) == 0) { 807 /* If user didn't provide trstring, derive it from trtype */ 808 spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype); 809 } 810 811 if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) { 812 SPDK_ERRLOG("NVMe trtype %u (%s) not available\n", 813 probe_ctx->trid.trtype, probe_ctx->trid.trstring); 814 return -1; 815 } 816 817 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 818 819 rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); 820 if (rc != 0) { 821 SPDK_ERRLOG("NVMe ctrlr scan failed\n"); 822 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 823 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 824 nvme_transport_ctrlr_destruct(ctrlr); 825 } 826 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 827 return -1; 828 } 829 830 /* 831 * Probe controllers on the shared_attached_ctrlrs list 832 */ 833 if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { 834 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 835 /* Do not attach other ctrlrs if user specify a valid trid */ 836 if ((strlen(probe_ctx->trid.traddr) != 0) && 837 (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) { 838 continue; 839 } 840 841 /* Do not attach if we failed to initialize it in this process */ 842 if (nvme_ctrlr_get_current_process(ctrlr) == NULL) { 843 continue; 844 } 845 846 nvme_ctrlr_proc_get_ref(ctrlr); 847 848 /* 849 * Unlock while calling attach_cb() so the user can call other functions 850 * that may take the driver lock, like nvme_detach(). 851 */ 852 if (probe_ctx->attach_cb) { 853 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 854 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 855 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 856 } 857 } 858 } 859 860 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 861 862 return 0; 863 } 864 865 static void 866 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx, 867 const struct spdk_nvme_transport_id *trid, 868 void *cb_ctx, 869 spdk_nvme_probe_cb probe_cb, 870 spdk_nvme_attach_cb attach_cb, 871 spdk_nvme_remove_cb remove_cb) 872 { 873 probe_ctx->trid = *trid; 874 probe_ctx->cb_ctx = cb_ctx; 875 probe_ctx->probe_cb = probe_cb; 876 probe_ctx->attach_cb = attach_cb; 877 probe_ctx->remove_cb = remove_cb; 878 TAILQ_INIT(&probe_ctx->init_ctrlrs); 879 } 880 881 int 882 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx, 883 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, 884 spdk_nvme_remove_cb remove_cb) 885 { 886 struct spdk_nvme_transport_id trid_pcie; 887 struct spdk_nvme_probe_ctx *probe_ctx; 888 889 if (trid == NULL) { 890 memset(&trid_pcie, 0, sizeof(trid_pcie)); 891 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 892 trid = &trid_pcie; 893 } 894 895 probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb, 896 attach_cb, remove_cb); 897 if (!probe_ctx) { 898 SPDK_ERRLOG("Create probe context failed\n"); 899 return -1; 900 } 901 902 /* 903 * Keep going even if one or more nvme_attach() calls failed, 904 * but maintain the value of rc to signal errors when we return. 905 */ 906 return nvme_init_controllers(probe_ctx); 907 } 908 909 static bool 910 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 911 struct spdk_nvme_ctrlr_opts *opts) 912 { 913 struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx; 914 915 assert(requested_opts); 916 memcpy(opts, requested_opts, sizeof(*opts)); 917 918 return true; 919 } 920 921 static void 922 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts, 923 const struct spdk_nvme_ctrlr_opts *opts_user, 924 size_t opts_size_user) 925 { 926 assert(opts); 927 assert(opts_user); 928 929 spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user); 930 931 #define FIELD_OK(field) \ 932 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size) 933 934 #define SET_FIELD(field) \ 935 if (FIELD_OK(field)) { \ 936 opts->field = opts_user->field; \ 937 } 938 939 #define SET_FIELD_ARRAY(field) \ 940 if (FIELD_OK(field)) { \ 941 memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \ 942 } 943 944 SET_FIELD(num_io_queues); 945 SET_FIELD(use_cmb_sqs); 946 SET_FIELD(no_shn_notification); 947 SET_FIELD(arb_mechanism); 948 SET_FIELD(arbitration_burst); 949 SET_FIELD(low_priority_weight); 950 SET_FIELD(medium_priority_weight); 951 SET_FIELD(high_priority_weight); 952 SET_FIELD(keep_alive_timeout_ms); 953 SET_FIELD(transport_retry_count); 954 SET_FIELD(io_queue_size); 955 SET_FIELD_ARRAY(hostnqn); 956 SET_FIELD(io_queue_requests); 957 SET_FIELD_ARRAY(src_addr); 958 SET_FIELD_ARRAY(src_svcid); 959 SET_FIELD_ARRAY(host_id); 960 SET_FIELD_ARRAY(extended_host_id); 961 SET_FIELD(command_set); 962 SET_FIELD(admin_timeout_ms); 963 SET_FIELD(header_digest); 964 SET_FIELD(data_digest); 965 SET_FIELD(disable_error_logging); 966 SET_FIELD(transport_ack_timeout); 967 SET_FIELD(admin_queue_size); 968 SET_FIELD(fabrics_connect_timeout_us); 969 SET_FIELD(disable_read_ana_log_page); 970 SET_FIELD(disable_read_changed_ns_list_log_page); 971 SET_FIELD_ARRAY(psk); 972 SET_FIELD(tls_psk); 973 SET_FIELD(dhchap_key); 974 SET_FIELD(dhchap_ctrlr_key); 975 SET_FIELD(dhchap_digests); 976 SET_FIELD(dhchap_dhgroups); 977 978 #undef FIELD_OK 979 #undef SET_FIELD 980 #undef SET_FIELD_ARRAY 981 } 982 983 struct spdk_nvme_ctrlr * 984 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid, 985 const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 986 { 987 int rc; 988 struct spdk_nvme_ctrlr *ctrlr = NULL; 989 struct spdk_nvme_probe_ctx *probe_ctx; 990 struct spdk_nvme_ctrlr_opts *opts_local_p = NULL; 991 struct spdk_nvme_ctrlr_opts opts_local; 992 993 if (trid == NULL) { 994 SPDK_ERRLOG("No transport ID specified\n"); 995 return NULL; 996 } 997 998 if (opts) { 999 opts_local_p = &opts_local; 1000 nvme_ctrlr_opts_init(opts_local_p, opts, opts_size); 1001 } 1002 1003 probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL); 1004 if (!probe_ctx) { 1005 SPDK_ERRLOG("Create probe context failed\n"); 1006 return NULL; 1007 } 1008 1009 rc = nvme_init_controllers(probe_ctx); 1010 if (rc != 0) { 1011 return NULL; 1012 } 1013 1014 ctrlr = nvme_get_ctrlr_by_trid(trid); 1015 1016 return ctrlr; 1017 } 1018 1019 void 1020 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid, 1021 enum spdk_nvme_transport_type trtype) 1022 { 1023 const char *trstring; 1024 1025 trid->trtype = trtype; 1026 switch (trtype) { 1027 case SPDK_NVME_TRANSPORT_FC: 1028 trstring = SPDK_NVME_TRANSPORT_NAME_FC; 1029 break; 1030 case SPDK_NVME_TRANSPORT_PCIE: 1031 trstring = SPDK_NVME_TRANSPORT_NAME_PCIE; 1032 break; 1033 case SPDK_NVME_TRANSPORT_RDMA: 1034 trstring = SPDK_NVME_TRANSPORT_NAME_RDMA; 1035 break; 1036 case SPDK_NVME_TRANSPORT_TCP: 1037 trstring = SPDK_NVME_TRANSPORT_NAME_TCP; 1038 break; 1039 case SPDK_NVME_TRANSPORT_VFIOUSER: 1040 trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER; 1041 break; 1042 case SPDK_NVME_TRANSPORT_CUSTOM: 1043 trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM; 1044 break; 1045 default: 1046 SPDK_ERRLOG("no available transports\n"); 1047 assert(0); 1048 return; 1049 } 1050 snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring); 1051 } 1052 1053 int 1054 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring) 1055 { 1056 int i = 0; 1057 1058 if (trid == NULL || trstring == NULL) { 1059 return -EINVAL; 1060 } 1061 1062 /* Note: gcc-11 has some false positive -Wstringop-overread warnings with LTO builds if we 1063 * use strnlen here. So do the trstring copy manually instead. See GitHub issue #2391. 1064 */ 1065 1066 /* cast official trstring to uppercase version of input. */ 1067 while (i < SPDK_NVMF_TRSTRING_MAX_LEN && trstring[i] != 0) { 1068 trid->trstring[i] = toupper(trstring[i]); 1069 i++; 1070 } 1071 1072 if (trstring[i] != 0) { 1073 return -EINVAL; 1074 } else { 1075 trid->trstring[i] = 0; 1076 return 0; 1077 } 1078 } 1079 1080 int 1081 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str) 1082 { 1083 if (trtype == NULL || str == NULL) { 1084 return -EINVAL; 1085 } 1086 1087 if (strcasecmp(str, "PCIe") == 0) { 1088 *trtype = SPDK_NVME_TRANSPORT_PCIE; 1089 } else if (strcasecmp(str, "RDMA") == 0) { 1090 *trtype = SPDK_NVME_TRANSPORT_RDMA; 1091 } else if (strcasecmp(str, "FC") == 0) { 1092 *trtype = SPDK_NVME_TRANSPORT_FC; 1093 } else if (strcasecmp(str, "TCP") == 0) { 1094 *trtype = SPDK_NVME_TRANSPORT_TCP; 1095 } else if (strcasecmp(str, "VFIOUSER") == 0) { 1096 *trtype = SPDK_NVME_TRANSPORT_VFIOUSER; 1097 } else { 1098 *trtype = SPDK_NVME_TRANSPORT_CUSTOM; 1099 } 1100 return 0; 1101 } 1102 1103 const char * 1104 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype) 1105 { 1106 switch (trtype) { 1107 case SPDK_NVME_TRANSPORT_PCIE: 1108 return "PCIe"; 1109 case SPDK_NVME_TRANSPORT_RDMA: 1110 return "RDMA"; 1111 case SPDK_NVME_TRANSPORT_FC: 1112 return "FC"; 1113 case SPDK_NVME_TRANSPORT_TCP: 1114 return "TCP"; 1115 case SPDK_NVME_TRANSPORT_VFIOUSER: 1116 return "VFIOUSER"; 1117 case SPDK_NVME_TRANSPORT_CUSTOM: 1118 return "CUSTOM"; 1119 default: 1120 return NULL; 1121 } 1122 } 1123 1124 int 1125 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str) 1126 { 1127 if (adrfam == NULL || str == NULL) { 1128 return -EINVAL; 1129 } 1130 1131 if (strcasecmp(str, "IPv4") == 0) { 1132 *adrfam = SPDK_NVMF_ADRFAM_IPV4; 1133 } else if (strcasecmp(str, "IPv6") == 0) { 1134 *adrfam = SPDK_NVMF_ADRFAM_IPV6; 1135 } else if (strcasecmp(str, "IB") == 0) { 1136 *adrfam = SPDK_NVMF_ADRFAM_IB; 1137 } else if (strcasecmp(str, "FC") == 0) { 1138 *adrfam = SPDK_NVMF_ADRFAM_FC; 1139 } else { 1140 return -ENOENT; 1141 } 1142 return 0; 1143 } 1144 1145 const char * 1146 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam) 1147 { 1148 switch (adrfam) { 1149 case SPDK_NVMF_ADRFAM_IPV4: 1150 return "IPv4"; 1151 case SPDK_NVMF_ADRFAM_IPV6: 1152 return "IPv6"; 1153 case SPDK_NVMF_ADRFAM_IB: 1154 return "IB"; 1155 case SPDK_NVMF_ADRFAM_FC: 1156 return "FC"; 1157 default: 1158 return NULL; 1159 } 1160 } 1161 1162 static size_t 1163 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size) 1164 { 1165 1166 const char *sep, *sep1; 1167 const char *whitespace = " \t\n"; 1168 size_t key_len, val_len; 1169 1170 *str += strspn(*str, whitespace); 1171 1172 sep = strchr(*str, ':'); 1173 if (!sep) { 1174 sep = strchr(*str, '='); 1175 if (!sep) { 1176 SPDK_ERRLOG("Key without ':' or '=' separator\n"); 1177 return 0; 1178 } 1179 } else { 1180 sep1 = strchr(*str, '='); 1181 if ((sep1 != NULL) && (sep1 < sep)) { 1182 sep = sep1; 1183 } 1184 } 1185 1186 key_len = sep - *str; 1187 if (key_len >= key_buf_size) { 1188 SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n", 1189 key_len, key_buf_size - 1); 1190 return 0; 1191 } 1192 1193 memcpy(key, *str, key_len); 1194 key[key_len] = '\0'; 1195 1196 *str += key_len + 1; /* Skip key: */ 1197 val_len = strcspn(*str, whitespace); 1198 if (val_len == 0) { 1199 SPDK_ERRLOG("Key without value\n"); 1200 return 0; 1201 } 1202 1203 if (val_len >= val_buf_size) { 1204 SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n", 1205 val_len, val_buf_size - 1); 1206 return 0; 1207 } 1208 1209 memcpy(val, *str, val_len); 1210 val[val_len] = '\0'; 1211 1212 *str += val_len; 1213 1214 return val_len; 1215 } 1216 1217 int 1218 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str) 1219 { 1220 size_t val_len; 1221 char key[32]; 1222 char val[1024]; 1223 1224 if (trid == NULL || str == NULL) { 1225 return -EINVAL; 1226 } 1227 1228 while (*str != '\0') { 1229 1230 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1231 1232 if (val_len == 0) { 1233 SPDK_ERRLOG("Failed to parse transport ID\n"); 1234 return -EINVAL; 1235 } 1236 1237 if (strcasecmp(key, "trtype") == 0) { 1238 if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) { 1239 SPDK_ERRLOG("invalid transport '%s'\n", val); 1240 return -EINVAL; 1241 } 1242 if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) { 1243 SPDK_ERRLOG("Unknown trtype '%s'\n", val); 1244 return -EINVAL; 1245 } 1246 } else if (strcasecmp(key, "adrfam") == 0) { 1247 if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) { 1248 SPDK_ERRLOG("Unknown adrfam '%s'\n", val); 1249 return -EINVAL; 1250 } 1251 } else if (strcasecmp(key, "traddr") == 0) { 1252 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1253 SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n", 1254 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1255 return -EINVAL; 1256 } 1257 memcpy(trid->traddr, val, val_len + 1); 1258 } else if (strcasecmp(key, "trsvcid") == 0) { 1259 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1260 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1261 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1262 return -EINVAL; 1263 } 1264 memcpy(trid->trsvcid, val, val_len + 1); 1265 } else if (strcasecmp(key, "priority") == 0) { 1266 if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) { 1267 SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n", 1268 val_len, SPDK_NVMF_PRIORITY_MAX_LEN); 1269 return -EINVAL; 1270 } 1271 trid->priority = spdk_strtol(val, 10); 1272 } else if (strcasecmp(key, "subnqn") == 0) { 1273 if (val_len > SPDK_NVMF_NQN_MAX_LEN) { 1274 SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n", 1275 val_len, SPDK_NVMF_NQN_MAX_LEN); 1276 return -EINVAL; 1277 } 1278 memcpy(trid->subnqn, val, val_len + 1); 1279 } else if (strcasecmp(key, "hostaddr") == 0) { 1280 continue; 1281 } else if (strcasecmp(key, "hostsvcid") == 0) { 1282 continue; 1283 } else if (strcasecmp(key, "hostnqn") == 0) { 1284 continue; 1285 } else if (strcasecmp(key, "ns") == 0) { 1286 /* 1287 * Special case. The namespace id parameter may 1288 * optionally be passed in the transport id string 1289 * for an SPDK application (e.g. spdk_nvme_perf) 1290 * and additionally parsed therein to limit 1291 * targeting a specific namespace. For this 1292 * scenario, just silently ignore this key 1293 * rather than letting it default to logging 1294 * it as an invalid key. 1295 */ 1296 continue; 1297 } else if (strcasecmp(key, "alt_traddr") == 0) { 1298 /* 1299 * Used by applications for enabling transport ID failover. 1300 * Please see the case above for more information on custom parameters. 1301 */ 1302 continue; 1303 } else { 1304 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1305 } 1306 } 1307 1308 return 0; 1309 } 1310 1311 int 1312 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str) 1313 { 1314 1315 size_t key_size = 32; 1316 size_t val_size = 1024; 1317 size_t val_len; 1318 char key[key_size]; 1319 char val[val_size]; 1320 1321 if (hostid == NULL || str == NULL) { 1322 return -EINVAL; 1323 } 1324 1325 while (*str != '\0') { 1326 1327 val_len = parse_next_key(&str, key, val, key_size, val_size); 1328 1329 if (val_len == 0) { 1330 SPDK_ERRLOG("Failed to parse host ID\n"); 1331 return val_len; 1332 } 1333 1334 /* Ignore the rest of the options from the transport ID. */ 1335 if (strcasecmp(key, "trtype") == 0) { 1336 continue; 1337 } else if (strcasecmp(key, "adrfam") == 0) { 1338 continue; 1339 } else if (strcasecmp(key, "traddr") == 0) { 1340 continue; 1341 } else if (strcasecmp(key, "trsvcid") == 0) { 1342 continue; 1343 } else if (strcasecmp(key, "subnqn") == 0) { 1344 continue; 1345 } else if (strcasecmp(key, "priority") == 0) { 1346 continue; 1347 } else if (strcasecmp(key, "ns") == 0) { 1348 continue; 1349 } else if (strcasecmp(key, "hostaddr") == 0) { 1350 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1351 SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n", 1352 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1353 return -EINVAL; 1354 } 1355 memcpy(hostid->hostaddr, val, val_len + 1); 1356 1357 } else if (strcasecmp(key, "hostsvcid") == 0) { 1358 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1359 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1360 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1361 return -EINVAL; 1362 } 1363 memcpy(hostid->hostsvcid, val, val_len + 1); 1364 } else { 1365 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1366 } 1367 } 1368 1369 return 0; 1370 } 1371 1372 static int 1373 cmp_int(int a, int b) 1374 { 1375 return a - b; 1376 } 1377 1378 int 1379 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1, 1380 const struct spdk_nvme_transport_id *trid2) 1381 { 1382 int cmp; 1383 1384 if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) { 1385 cmp = strcasecmp(trid1->trstring, trid2->trstring); 1386 } else { 1387 cmp = cmp_int(trid1->trtype, trid2->trtype); 1388 } 1389 1390 if (cmp) { 1391 return cmp; 1392 } 1393 1394 if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1395 struct spdk_pci_addr pci_addr1 = {}; 1396 struct spdk_pci_addr pci_addr2 = {}; 1397 1398 /* Normalize PCI addresses before comparing */ 1399 if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 || 1400 spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) { 1401 return -1; 1402 } 1403 1404 /* PCIe transport ID only uses trtype and traddr */ 1405 return spdk_pci_addr_compare(&pci_addr1, &pci_addr2); 1406 } 1407 1408 cmp = strcasecmp(trid1->traddr, trid2->traddr); 1409 if (cmp) { 1410 return cmp; 1411 } 1412 1413 cmp = cmp_int(trid1->adrfam, trid2->adrfam); 1414 if (cmp) { 1415 return cmp; 1416 } 1417 1418 cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid); 1419 if (cmp) { 1420 return cmp; 1421 } 1422 1423 cmp = strcmp(trid1->subnqn, trid2->subnqn); 1424 if (cmp) { 1425 return cmp; 1426 } 1427 1428 return 0; 1429 } 1430 1431 int 1432 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str) 1433 { 1434 size_t val_len; 1435 char key[32]; 1436 char val[1024]; 1437 1438 if (prchk_flags == NULL || str == NULL) { 1439 return -EINVAL; 1440 } 1441 1442 while (*str != '\0') { 1443 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1444 1445 if (val_len == 0) { 1446 SPDK_ERRLOG("Failed to parse prchk\n"); 1447 return -EINVAL; 1448 } 1449 1450 if (strcasecmp(key, "prchk") == 0) { 1451 if (strcasestr(val, "reftag") != NULL) { 1452 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; 1453 } 1454 if (strcasestr(val, "guard") != NULL) { 1455 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; 1456 } 1457 } else { 1458 SPDK_ERRLOG("Unknown key '%s'\n", key); 1459 return -EINVAL; 1460 } 1461 } 1462 1463 return 0; 1464 } 1465 1466 const char * 1467 spdk_nvme_prchk_flags_str(uint32_t prchk_flags) 1468 { 1469 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { 1470 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1471 return "prchk:reftag|guard"; 1472 } else { 1473 return "prchk:reftag"; 1474 } 1475 } else { 1476 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1477 return "prchk:guard"; 1478 } else { 1479 return NULL; 1480 } 1481 } 1482 } 1483 1484 struct spdk_nvme_probe_ctx * 1485 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, 1486 void *cb_ctx, 1487 spdk_nvme_probe_cb probe_cb, 1488 spdk_nvme_attach_cb attach_cb, 1489 spdk_nvme_remove_cb remove_cb) 1490 { 1491 int rc; 1492 struct spdk_nvme_probe_ctx *probe_ctx; 1493 1494 rc = nvme_driver_init(); 1495 if (rc != 0) { 1496 return NULL; 1497 } 1498 1499 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1500 if (!probe_ctx) { 1501 return NULL; 1502 } 1503 1504 nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb); 1505 rc = nvme_probe_internal(probe_ctx, false); 1506 if (rc != 0) { 1507 free(probe_ctx); 1508 return NULL; 1509 } 1510 1511 return probe_ctx; 1512 } 1513 1514 int 1515 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) 1516 { 1517 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 1518 1519 if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1520 free(probe_ctx); 1521 return 0; 1522 } 1523 1524 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 1525 nvme_ctrlr_poll_internal(ctrlr, probe_ctx); 1526 } 1527 1528 if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) { 1529 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 1530 g_spdk_nvme_driver->initialized = true; 1531 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 1532 free(probe_ctx); 1533 return 0; 1534 } 1535 1536 return -EAGAIN; 1537 } 1538 1539 struct spdk_nvme_probe_ctx * 1540 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid, 1541 const struct spdk_nvme_ctrlr_opts *opts, 1542 spdk_nvme_attach_cb attach_cb) 1543 { 1544 int rc; 1545 spdk_nvme_probe_cb probe_cb = NULL; 1546 struct spdk_nvme_probe_ctx *probe_ctx; 1547 1548 rc = nvme_driver_init(); 1549 if (rc != 0) { 1550 return NULL; 1551 } 1552 1553 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1554 if (!probe_ctx) { 1555 return NULL; 1556 } 1557 1558 if (opts) { 1559 probe_cb = nvme_connect_probe_cb; 1560 } 1561 1562 nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL); 1563 rc = nvme_probe_internal(probe_ctx, true); 1564 if (rc != 0) { 1565 free(probe_ctx); 1566 return NULL; 1567 } 1568 1569 return probe_ctx; 1570 } 1571 1572 int 1573 nvme_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service, 1574 long int *port) 1575 { 1576 struct addrinfo *res; 1577 struct addrinfo hints; 1578 int ret; 1579 1580 memset(&hints, 0, sizeof(hints)); 1581 hints.ai_family = family; 1582 hints.ai_socktype = SOCK_STREAM; 1583 hints.ai_protocol = 0; 1584 1585 if (addr == NULL || service == NULL) { 1586 SPDK_ERRLOG("addr and service must both be non-NULL\n"); 1587 return -EINVAL; 1588 } 1589 1590 *port = spdk_strtol(service, 10); 1591 if (*port <= 0 || *port >= 65536) { 1592 SPDK_ERRLOG("Invalid port: %s\n", service); 1593 return -EINVAL; 1594 } 1595 1596 ret = getaddrinfo(addr, service, &hints, &res); 1597 if (ret) { 1598 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 1599 return -(abs(ret)); 1600 } 1601 1602 if (res->ai_addrlen > sizeof(*sa)) { 1603 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 1604 ret = -EINVAL; 1605 } else { 1606 memcpy(sa, res->ai_addr, res->ai_addrlen); 1607 } 1608 1609 freeaddrinfo(res); 1610 return ret; 1611 } 1612 1613 int 1614 nvme_get_default_hostnqn(char *buf, int len) 1615 { 1616 char uuid[SPDK_UUID_STRING_LEN]; 1617 int rc; 1618 1619 spdk_uuid_fmt_lower(uuid, sizeof(uuid), &g_spdk_nvme_driver->default_extended_host_id); 1620 rc = snprintf(buf, len, "nqn.2014-08.org.nvmexpress:uuid:%s", uuid); 1621 if (rc < 0 || rc >= len) { 1622 return -EINVAL; 1623 } 1624 1625 return 0; 1626 } 1627 1628 SPDK_LOG_REGISTER_COMPONENT(nvme) 1629