1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/config.h" 7 #include "spdk/nvmf_spec.h" 8 #include "spdk/string.h" 9 #include "spdk/env.h" 10 #include "nvme_internal.h" 11 #include "nvme_io_msg.h" 12 13 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" 14 15 struct nvme_driver *g_spdk_nvme_driver; 16 pid_t g_spdk_nvme_pid; 17 18 /* gross timeout of 180 seconds in milliseconds */ 19 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000; 20 21 /* Per-process attached controller list */ 22 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs = 23 TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs); 24 25 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */ 26 static bool 27 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) 28 { 29 return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; 30 } 31 32 void 33 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, 34 struct spdk_nvme_ctrlr *ctrlr) 35 { 36 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 37 } 38 39 static void 40 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr) 41 { 42 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 43 if (nvme_ctrlr_shared(ctrlr)) { 44 TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 45 } else { 46 TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); 47 } 48 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 49 } 50 51 static int 52 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr, 53 struct nvme_ctrlr_detach_ctx **_ctx) 54 { 55 struct nvme_ctrlr_detach_ctx *ctx; 56 int ref_count; 57 58 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 59 60 ref_count = nvme_ctrlr_get_ref_count(ctrlr); 61 assert(ref_count > 0); 62 63 if (ref_count == 1) { 64 /* This is the last reference to the controller, so we need to 65 * allocate a context to destruct it. 66 */ 67 ctx = calloc(1, sizeof(*ctx)); 68 if (ctx == NULL) { 69 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 70 71 return -ENOMEM; 72 } 73 ctx->ctrlr = ctrlr; 74 ctx->cb_fn = nvme_ctrlr_detach_async_finish; 75 76 nvme_ctrlr_proc_put_ref(ctrlr); 77 78 nvme_io_msg_ctrlr_detach(ctrlr); 79 80 nvme_ctrlr_destruct_async(ctrlr, ctx); 81 82 *_ctx = ctx; 83 } else { 84 nvme_ctrlr_proc_put_ref(ctrlr); 85 } 86 87 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 88 89 return 0; 90 } 91 92 static int 93 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx) 94 { 95 int rc; 96 97 rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx); 98 if (rc == -EAGAIN) { 99 return -EAGAIN; 100 } 101 102 free(ctx); 103 104 return rc; 105 } 106 107 int 108 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr) 109 { 110 struct nvme_ctrlr_detach_ctx *ctx = NULL; 111 int rc; 112 113 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 114 if (rc != 0) { 115 return rc; 116 } else if (ctx == NULL) { 117 /* ctrlr was detached from the caller process but any other process 118 * still attaches it. 119 */ 120 return 0; 121 } 122 123 while (1) { 124 rc = nvme_ctrlr_detach_poll_async(ctx); 125 if (rc != -EAGAIN) { 126 break; 127 } 128 nvme_delay(1000); 129 } 130 131 return 0; 132 } 133 134 int 135 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr, 136 struct spdk_nvme_detach_ctx **_detach_ctx) 137 { 138 struct spdk_nvme_detach_ctx *detach_ctx; 139 struct nvme_ctrlr_detach_ctx *ctx = NULL; 140 int rc; 141 142 if (ctrlr == NULL || _detach_ctx == NULL) { 143 return -EINVAL; 144 } 145 146 /* Use a context header to poll detachment for multiple controllers. 147 * Allocate an new one if not allocated yet, or use the passed one otherwise. 148 */ 149 detach_ctx = *_detach_ctx; 150 if (detach_ctx == NULL) { 151 detach_ctx = calloc(1, sizeof(*detach_ctx)); 152 if (detach_ctx == NULL) { 153 return -ENOMEM; 154 } 155 TAILQ_INIT(&detach_ctx->head); 156 } 157 158 rc = nvme_ctrlr_detach_async(ctrlr, &ctx); 159 if (rc != 0 || ctx == NULL) { 160 /* If this detach failed and the context header is empty, it means we just 161 * allocated the header and need to free it before returning. 162 */ 163 if (TAILQ_EMPTY(&detach_ctx->head)) { 164 free(detach_ctx); 165 } 166 return rc; 167 } 168 169 /* Append a context for this detachment to the context header. */ 170 TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link); 171 172 *_detach_ctx = detach_ctx; 173 174 return 0; 175 } 176 177 int 178 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx) 179 { 180 struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx; 181 int rc; 182 183 if (detach_ctx == NULL) { 184 return -EINVAL; 185 } 186 187 TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) { 188 TAILQ_REMOVE(&detach_ctx->head, ctx, link); 189 190 rc = nvme_ctrlr_detach_poll_async(ctx); 191 if (rc == -EAGAIN) { 192 /* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */ 193 TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link); 194 } 195 } 196 197 if (!TAILQ_EMPTY(&detach_ctx->head)) { 198 return -EAGAIN; 199 } 200 201 free(detach_ctx); 202 return 0; 203 } 204 205 void 206 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx) 207 { 208 while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) { 209 ; 210 } 211 } 212 213 void 214 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) 215 { 216 struct nvme_completion_poll_status *status = arg; 217 218 if (status->timed_out) { 219 /* There is no routine waiting for the completion of this request, free allocated memory */ 220 spdk_free(status->dma_data); 221 free(status); 222 return; 223 } 224 225 /* 226 * Copy status into the argument passed by the caller, so that 227 * the caller can check the status to determine if the 228 * the request passed or failed. 229 */ 230 memcpy(&status->cpl, cpl, sizeof(*cpl)); 231 status->done = true; 232 } 233 234 static void 235 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 236 { 237 } 238 239 int 240 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair, 241 struct nvme_completion_poll_status *status, 242 pthread_mutex_t *robust_mutex) 243 { 244 int rc; 245 246 if (robust_mutex) { 247 nvme_robust_mutex_lock(robust_mutex); 248 } 249 250 if (qpair->poll_group) { 251 rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0, 252 dummy_disconnected_qpair_cb); 253 } else { 254 rc = spdk_nvme_qpair_process_completions(qpair, 0); 255 } 256 257 if (robust_mutex) { 258 nvme_robust_mutex_unlock(robust_mutex); 259 } 260 261 if (rc < 0) { 262 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 263 status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 264 goto error; 265 } 266 267 if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) { 268 goto error; 269 } 270 271 if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 272 union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr); 273 if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) { 274 status->cpl.status.sct = SPDK_NVME_SCT_GENERIC; 275 status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 276 goto error; 277 } 278 } 279 280 if (!status->done) { 281 return -EAGAIN; 282 } else if (spdk_nvme_cpl_is_error(&status->cpl)) { 283 return -EIO; 284 } else { 285 return 0; 286 } 287 error: 288 /* Either transport error occurred or we've timed out. Either way, if the response hasn't 289 * been received yet, mark the command as timed out, so the status gets freed when the 290 * command is completed or aborted. 291 */ 292 if (!status->done) { 293 status->timed_out = true; 294 } 295 296 return -ECANCELED; 297 } 298 299 /** 300 * Poll qpair for completions until a command completes. 301 * 302 * \param qpair queue to poll 303 * \param status completion status. The user must fill this structure with zeroes before calling 304 * this function 305 * \param robust_mutex optional robust mutex to lock while polling qpair 306 * \param timeout_in_usecs optional timeout 307 * 308 * \return 0 if command completed without error, 309 * -EIO if command completed with error, 310 * -ECANCELED if command is not completed due to transport/device error or time expired 311 * 312 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 313 * and status as the callback argument. 314 */ 315 int 316 nvme_wait_for_completion_robust_lock_timeout( 317 struct spdk_nvme_qpair *qpair, 318 struct nvme_completion_poll_status *status, 319 pthread_mutex_t *robust_mutex, 320 uint64_t timeout_in_usecs) 321 { 322 int rc; 323 324 if (timeout_in_usecs) { 325 status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs * 326 spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 327 } else { 328 status->timeout_tsc = 0; 329 } 330 331 status->cpl.status_raw = 0; 332 do { 333 rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex); 334 } while (rc == -EAGAIN); 335 336 return rc; 337 } 338 339 /** 340 * Poll qpair for completions until a command completes. 341 * 342 * \param qpair queue to poll 343 * \param status completion status. The user must fill this structure with zeroes before calling 344 * this function 345 * \param robust_mutex optional robust mutex to lock while polling qpair 346 * 347 * \return 0 if command completed without error, 348 * -EIO if command completed with error, 349 * -ECANCELED if command is not completed due to transport/device error 350 * 351 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 352 * and status as the callback argument. 353 */ 354 int 355 nvme_wait_for_completion_robust_lock( 356 struct spdk_nvme_qpair *qpair, 357 struct nvme_completion_poll_status *status, 358 pthread_mutex_t *robust_mutex) 359 { 360 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0); 361 } 362 363 int 364 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, 365 struct nvme_completion_poll_status *status) 366 { 367 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0); 368 } 369 370 /** 371 * Poll qpair for completions until a command completes. 372 * 373 * \param qpair queue to poll 374 * \param status completion status. The user must fill this structure with zeroes before calling 375 * this function 376 * \param timeout_in_usecs optional timeout 377 * 378 * \return 0 if command completed without error, 379 * -EIO if command completed with error, 380 * -ECANCELED if command is not completed due to transport/device error or time expired 381 * 382 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback 383 * and status as the callback argument. 384 */ 385 int 386 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, 387 struct nvme_completion_poll_status *status, 388 uint64_t timeout_in_usecs) 389 { 390 return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs); 391 } 392 393 static void 394 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl) 395 { 396 struct nvme_request *req = arg; 397 spdk_nvme_cmd_cb user_cb_fn; 398 void *user_cb_arg; 399 enum spdk_nvme_data_transfer xfer; 400 401 if (req->user_buffer && req->payload_size) { 402 /* Copy back to the user buffer */ 403 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 404 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); 405 if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || 406 xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { 407 assert(req->pid == getpid()); 408 memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size); 409 } 410 } 411 412 user_cb_fn = req->user_cb_fn; 413 user_cb_arg = req->user_cb_arg; 414 nvme_cleanup_user_req(req); 415 416 /* Call the user's original callback now that the buffer has been copied */ 417 user_cb_fn(user_cb_arg, cpl); 418 419 } 420 421 /** 422 * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer. 423 * 424 * This is intended for use in non-fast-path functions (admin commands, reservations, etc.) 425 * where the overhead of a copy is not a problem. 426 */ 427 struct nvme_request * 428 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, 429 void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, 430 void *cb_arg, bool host_to_controller) 431 { 432 struct nvme_request *req; 433 void *dma_buffer = NULL; 434 435 if (buffer && payload_size) { 436 dma_buffer = spdk_zmalloc(payload_size, 4096, NULL, 437 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 438 if (!dma_buffer) { 439 return NULL; 440 } 441 442 if (host_to_controller) { 443 memcpy(dma_buffer, buffer, payload_size); 444 } 445 } 446 447 req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete, 448 NULL); 449 if (!req) { 450 spdk_free(dma_buffer); 451 return NULL; 452 } 453 454 req->user_cb_fn = cb_fn; 455 req->user_cb_arg = cb_arg; 456 req->user_buffer = buffer; 457 req->cb_arg = req; 458 459 return req; 460 } 461 462 /** 463 * Check if a request has exceeded the controller timeout. 464 * 465 * \param req request to check for timeout. 466 * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn) 467 * \param active_proc per-process data for the controller associated with req 468 * \param now_tick current time from spdk_get_ticks() 469 * \return 0 if requests submitted more recently than req should still be checked for timeouts, or 470 * 1 if requests newer than req need not be checked. 471 * 472 * The request's timeout callback will be called if needed; the caller is only responsible for 473 * calling this function on each outstanding request. 474 */ 475 int 476 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, 477 struct spdk_nvme_ctrlr_process *active_proc, 478 uint64_t now_tick) 479 { 480 struct spdk_nvme_qpair *qpair = req->qpair; 481 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 482 uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ? 483 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks; 484 485 assert(active_proc->timeout_cb_fn != NULL); 486 487 if (req->timed_out || req->submit_tick == 0) { 488 return 0; 489 } 490 491 if (req->pid != g_spdk_nvme_pid) { 492 return 0; 493 } 494 495 if (nvme_qpair_is_admin_queue(qpair) && 496 req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 497 return 0; 498 } 499 500 if (req->submit_tick + timeout_ticks > now_tick) { 501 return 1; 502 } 503 504 req->timed_out = true; 505 506 /* 507 * We don't want to expose the admin queue to the user, 508 * so when we're timing out admin commands set the 509 * qpair to NULL. 510 */ 511 active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, 512 nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, 513 cid); 514 return 0; 515 } 516 517 int 518 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx) 519 { 520 int rc = 0; 521 522 #ifdef __FreeBSD__ 523 pthread_mutex_init(mtx, NULL); 524 #else 525 pthread_mutexattr_t attr; 526 527 if (pthread_mutexattr_init(&attr)) { 528 return -1; 529 } 530 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || 531 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) || 532 pthread_mutex_init(mtx, &attr)) { 533 rc = -1; 534 } 535 pthread_mutexattr_destroy(&attr); 536 #endif 537 538 return rc; 539 } 540 541 int 542 nvme_driver_init(void) 543 { 544 static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER; 545 int ret = 0; 546 /* Any socket ID */ 547 int socket_id = -1; 548 549 /* Use a special process-private mutex to ensure the global 550 * nvme driver object (g_spdk_nvme_driver) gets initialized by 551 * only one thread. Once that object is established and its 552 * mutex is initialized, we can unlock this mutex and use that 553 * one instead. 554 */ 555 pthread_mutex_lock(&g_init_mutex); 556 557 /* Each process needs its own pid. */ 558 g_spdk_nvme_pid = getpid(); 559 560 /* 561 * Only one thread from one process will do this driver init work. 562 * The primary process will reserve the shared memory and do the 563 * initialization. 564 * The secondary process will lookup the existing reserved memory. 565 */ 566 if (spdk_process_is_primary()) { 567 /* The unique named memzone already reserved. */ 568 if (g_spdk_nvme_driver != NULL) { 569 pthread_mutex_unlock(&g_init_mutex); 570 return 0; 571 } else { 572 g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME, 573 sizeof(struct nvme_driver), socket_id, 574 SPDK_MEMZONE_NO_IOVA_CONTIG); 575 } 576 577 if (g_spdk_nvme_driver == NULL) { 578 SPDK_ERRLOG("primary process failed to reserve memory\n"); 579 pthread_mutex_unlock(&g_init_mutex); 580 return -1; 581 } 582 } else { 583 g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME); 584 585 /* The unique named memzone already reserved by the primary process. */ 586 if (g_spdk_nvme_driver != NULL) { 587 int ms_waited = 0; 588 589 /* Wait the nvme driver to get initialized. */ 590 while ((g_spdk_nvme_driver->initialized == false) && 591 (ms_waited < g_nvme_driver_timeout_ms)) { 592 ms_waited++; 593 nvme_delay(1000); /* delay 1ms */ 594 } 595 if (g_spdk_nvme_driver->initialized == false) { 596 SPDK_ERRLOG("timeout waiting for primary process to init\n"); 597 pthread_mutex_unlock(&g_init_mutex); 598 return -1; 599 } 600 } else { 601 SPDK_ERRLOG("primary process is not started yet\n"); 602 pthread_mutex_unlock(&g_init_mutex); 603 return -1; 604 } 605 606 pthread_mutex_unlock(&g_init_mutex); 607 return 0; 608 } 609 610 /* 611 * At this moment, only one thread from the primary process will do 612 * the g_spdk_nvme_driver initialization 613 */ 614 assert(spdk_process_is_primary()); 615 616 ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock); 617 if (ret != 0) { 618 SPDK_ERRLOG("failed to initialize mutex\n"); 619 spdk_memzone_free(SPDK_NVME_DRIVER_NAME); 620 pthread_mutex_unlock(&g_init_mutex); 621 return ret; 622 } 623 624 /* The lock in the shared g_spdk_nvme_driver object is now ready to 625 * be used - so we can unlock the g_init_mutex here. 626 */ 627 pthread_mutex_unlock(&g_init_mutex); 628 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 629 630 g_spdk_nvme_driver->initialized = false; 631 g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen(); 632 if (g_spdk_nvme_driver->hotplug_fd < 0) { 633 SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n"); 634 } 635 636 TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs); 637 638 spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id); 639 640 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 641 642 return ret; 643 } 644 645 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 646 int 647 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, 648 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle) 649 { 650 struct spdk_nvme_ctrlr *ctrlr; 651 struct spdk_nvme_ctrlr_opts opts; 652 653 assert(trid != NULL); 654 655 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); 656 657 if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) { 658 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, opts.hostnqn); 659 if (ctrlr) { 660 /* This ctrlr already exists. */ 661 662 if (ctrlr->is_destructed) { 663 /* This ctrlr is being destructed asynchronously. */ 664 SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n", 665 trid->traddr); 666 return -EBUSY; 667 } 668 669 /* Increase the ref count before calling attach_cb() as the user may 670 * call nvme_detach() immediately. */ 671 nvme_ctrlr_proc_get_ref(ctrlr); 672 673 if (probe_ctx->attach_cb) { 674 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 675 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 676 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 677 } 678 return 0; 679 } 680 681 ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle); 682 if (ctrlr == NULL) { 683 SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr); 684 return -1; 685 } 686 ctrlr->remove_cb = probe_ctx->remove_cb; 687 ctrlr->cb_ctx = probe_ctx->cb_ctx; 688 689 nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED); 690 TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq); 691 return 0; 692 } 693 694 return 1; 695 } 696 697 static void 698 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr, 699 struct spdk_nvme_probe_ctx *probe_ctx) 700 { 701 int rc = 0; 702 703 rc = nvme_ctrlr_process_init(ctrlr); 704 705 if (rc) { 706 /* Controller failed to initialize. */ 707 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 708 SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr); 709 nvme_ctrlr_lock(ctrlr); 710 nvme_ctrlr_fail(ctrlr, false); 711 nvme_ctrlr_unlock(ctrlr); 712 nvme_ctrlr_destruct(ctrlr); 713 return; 714 } 715 716 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 717 return; 718 } 719 720 STAILQ_INIT(&ctrlr->io_producers); 721 722 /* 723 * Controller has been initialized. 724 * Move it to the attached_ctrlrs list. 725 */ 726 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 727 728 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 729 if (nvme_ctrlr_shared(ctrlr)) { 730 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); 731 } else { 732 TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq); 733 } 734 735 /* 736 * Increase the ref count before calling attach_cb() as the user may 737 * call nvme_detach() immediately. 738 */ 739 nvme_ctrlr_proc_get_ref(ctrlr); 740 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 741 742 if (probe_ctx->attach_cb) { 743 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 744 } 745 } 746 747 static int 748 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx) 749 { 750 int rc = 0; 751 752 while (true) { 753 rc = spdk_nvme_probe_poll_async(probe_ctx); 754 if (rc != -EAGAIN) { 755 return rc; 756 } 757 } 758 759 return rc; 760 } 761 762 /* This function must not be called while holding g_spdk_nvme_driver->lock */ 763 static struct spdk_nvme_ctrlr * 764 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid, const char *hostnqn) 765 { 766 struct spdk_nvme_ctrlr *ctrlr; 767 768 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 769 ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, hostnqn); 770 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 771 772 return ctrlr; 773 } 774 775 /* This function must be called while holding g_spdk_nvme_driver->lock */ 776 struct spdk_nvme_ctrlr * 777 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid, const char *hostnqn) 778 { 779 struct spdk_nvme_ctrlr *ctrlr; 780 781 /* Search per-process list */ 782 TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) { 783 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) { 784 continue; 785 } 786 if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) { 787 continue; 788 } 789 return ctrlr; 790 } 791 792 /* Search multi-process shared list */ 793 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 794 if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) { 795 continue; 796 } 797 if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) { 798 continue; 799 } 800 return ctrlr; 801 } 802 803 return NULL; 804 } 805 806 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 807 static int 808 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, 809 bool direct_connect) 810 { 811 int rc; 812 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 813 const struct spdk_nvme_ctrlr_opts *opts = probe_ctx->opts; 814 815 if (strlen(probe_ctx->trid.trstring) == 0) { 816 /* If user didn't provide trstring, derive it from trtype */ 817 spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype); 818 } 819 820 if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) { 821 SPDK_ERRLOG("NVMe trtype %u (%s) not available\n", 822 probe_ctx->trid.trtype, probe_ctx->trid.trstring); 823 return -1; 824 } 825 826 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 827 828 rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); 829 if (rc != 0) { 830 SPDK_ERRLOG("NVMe ctrlr scan failed\n"); 831 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 832 TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq); 833 nvme_transport_ctrlr_destruct(ctrlr); 834 } 835 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 836 return -1; 837 } 838 839 /* 840 * Probe controllers on the shared_attached_ctrlrs list 841 */ 842 if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { 843 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { 844 /* Do not attach other ctrlrs if user specify a valid trid */ 845 if ((strlen(probe_ctx->trid.traddr) != 0) && 846 (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) { 847 continue; 848 } 849 850 if (opts && strcmp(opts->hostnqn, ctrlr->opts.hostnqn) != 0) { 851 continue; 852 } 853 854 /* Do not attach if we failed to initialize it in this process */ 855 if (nvme_ctrlr_get_current_process(ctrlr) == NULL) { 856 continue; 857 } 858 859 nvme_ctrlr_proc_get_ref(ctrlr); 860 861 /* 862 * Unlock while calling attach_cb() so the user can call other functions 863 * that may take the driver lock, like nvme_detach(). 864 */ 865 if (probe_ctx->attach_cb) { 866 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 867 probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts); 868 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 869 } 870 } 871 } 872 873 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 874 875 return 0; 876 } 877 878 static void 879 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx, 880 const struct spdk_nvme_transport_id *trid, 881 const struct spdk_nvme_ctrlr_opts *opts, 882 void *cb_ctx, 883 spdk_nvme_probe_cb probe_cb, 884 spdk_nvme_attach_cb attach_cb, 885 spdk_nvme_remove_cb remove_cb) 886 { 887 probe_ctx->trid = *trid; 888 probe_ctx->opts = opts; 889 probe_ctx->cb_ctx = cb_ctx; 890 probe_ctx->probe_cb = probe_cb; 891 probe_ctx->attach_cb = attach_cb; 892 probe_ctx->remove_cb = remove_cb; 893 TAILQ_INIT(&probe_ctx->init_ctrlrs); 894 } 895 896 int 897 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx, 898 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, 899 spdk_nvme_remove_cb remove_cb) 900 { 901 struct spdk_nvme_transport_id trid_pcie; 902 struct spdk_nvme_probe_ctx *probe_ctx; 903 904 if (trid == NULL) { 905 memset(&trid_pcie, 0, sizeof(trid_pcie)); 906 spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE); 907 trid = &trid_pcie; 908 } 909 910 probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb, 911 attach_cb, remove_cb); 912 if (!probe_ctx) { 913 SPDK_ERRLOG("Create probe context failed\n"); 914 return -1; 915 } 916 917 /* 918 * Keep going even if one or more nvme_attach() calls failed, 919 * but maintain the value of rc to signal errors when we return. 920 */ 921 return nvme_init_controllers(probe_ctx); 922 } 923 924 static bool 925 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 926 struct spdk_nvme_ctrlr_opts *opts) 927 { 928 struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx; 929 930 assert(requested_opts); 931 memcpy(opts, requested_opts, sizeof(*opts)); 932 933 return true; 934 } 935 936 static void 937 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts, 938 const struct spdk_nvme_ctrlr_opts *opts_user, 939 size_t opts_size_user) 940 { 941 assert(opts); 942 assert(opts_user); 943 944 spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user); 945 946 #define FIELD_OK(field) \ 947 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size) 948 949 #define SET_FIELD(field) \ 950 if (FIELD_OK(field)) { \ 951 opts->field = opts_user->field; \ 952 } 953 954 #define SET_FIELD_ARRAY(field) \ 955 if (FIELD_OK(field)) { \ 956 memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \ 957 } 958 959 SET_FIELD(num_io_queues); 960 SET_FIELD(use_cmb_sqs); 961 SET_FIELD(no_shn_notification); 962 SET_FIELD(arb_mechanism); 963 SET_FIELD(arbitration_burst); 964 SET_FIELD(low_priority_weight); 965 SET_FIELD(medium_priority_weight); 966 SET_FIELD(high_priority_weight); 967 SET_FIELD(keep_alive_timeout_ms); 968 SET_FIELD(transport_retry_count); 969 SET_FIELD(io_queue_size); 970 SET_FIELD_ARRAY(hostnqn); 971 SET_FIELD(io_queue_requests); 972 SET_FIELD_ARRAY(src_addr); 973 SET_FIELD_ARRAY(src_svcid); 974 SET_FIELD_ARRAY(host_id); 975 SET_FIELD_ARRAY(extended_host_id); 976 SET_FIELD(command_set); 977 SET_FIELD(admin_timeout_ms); 978 SET_FIELD(header_digest); 979 SET_FIELD(data_digest); 980 SET_FIELD(disable_error_logging); 981 SET_FIELD(transport_ack_timeout); 982 SET_FIELD(admin_queue_size); 983 SET_FIELD(fabrics_connect_timeout_us); 984 SET_FIELD(disable_read_ana_log_page); 985 SET_FIELD(disable_read_changed_ns_list_log_page); 986 SET_FIELD_ARRAY(psk); 987 SET_FIELD(tls_psk); 988 SET_FIELD(dhchap_key); 989 SET_FIELD(dhchap_ctrlr_key); 990 SET_FIELD(dhchap_digests); 991 SET_FIELD(dhchap_dhgroups); 992 993 #undef FIELD_OK 994 #undef SET_FIELD 995 #undef SET_FIELD_ARRAY 996 } 997 998 struct spdk_nvme_ctrlr * 999 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid, 1000 const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size) 1001 { 1002 int rc; 1003 struct spdk_nvme_ctrlr *ctrlr = NULL; 1004 struct spdk_nvme_probe_ctx *probe_ctx; 1005 struct spdk_nvme_ctrlr_opts *opts_local_p = NULL; 1006 struct spdk_nvme_ctrlr_opts opts_local; 1007 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 1008 1009 if (trid == NULL) { 1010 SPDK_ERRLOG("No transport ID specified\n"); 1011 return NULL; 1012 } 1013 1014 rc = nvme_driver_init(); 1015 if (rc != 0) { 1016 return NULL; 1017 } 1018 1019 nvme_get_default_hostnqn(hostnqn, sizeof(hostnqn)); 1020 if (opts) { 1021 opts_local_p = &opts_local; 1022 nvme_ctrlr_opts_init(opts_local_p, opts, opts_size); 1023 memcpy(hostnqn, opts_local.hostnqn, sizeof(hostnqn)); 1024 } 1025 1026 probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL); 1027 if (!probe_ctx) { 1028 SPDK_ERRLOG("Create probe context failed\n"); 1029 return NULL; 1030 } 1031 1032 rc = nvme_init_controllers(probe_ctx); 1033 if (rc != 0) { 1034 return NULL; 1035 } 1036 1037 ctrlr = nvme_get_ctrlr_by_trid(trid, hostnqn); 1038 1039 return ctrlr; 1040 } 1041 1042 void 1043 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid, 1044 enum spdk_nvme_transport_type trtype) 1045 { 1046 const char *trstring; 1047 1048 trid->trtype = trtype; 1049 switch (trtype) { 1050 case SPDK_NVME_TRANSPORT_FC: 1051 trstring = SPDK_NVME_TRANSPORT_NAME_FC; 1052 break; 1053 case SPDK_NVME_TRANSPORT_PCIE: 1054 trstring = SPDK_NVME_TRANSPORT_NAME_PCIE; 1055 break; 1056 case SPDK_NVME_TRANSPORT_RDMA: 1057 trstring = SPDK_NVME_TRANSPORT_NAME_RDMA; 1058 break; 1059 case SPDK_NVME_TRANSPORT_TCP: 1060 trstring = SPDK_NVME_TRANSPORT_NAME_TCP; 1061 break; 1062 case SPDK_NVME_TRANSPORT_VFIOUSER: 1063 trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER; 1064 break; 1065 case SPDK_NVME_TRANSPORT_CUSTOM: 1066 trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM; 1067 break; 1068 default: 1069 SPDK_ERRLOG("no available transports\n"); 1070 assert(0); 1071 return; 1072 } 1073 snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring); 1074 } 1075 1076 int 1077 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring) 1078 { 1079 int i = 0; 1080 1081 if (trid == NULL || trstring == NULL) { 1082 return -EINVAL; 1083 } 1084 1085 /* Note: gcc-11 has some false positive -Wstringop-overread warnings with LTO builds if we 1086 * use strnlen here. So do the trstring copy manually instead. See GitHub issue #2391. 1087 */ 1088 1089 /* cast official trstring to uppercase version of input. */ 1090 while (i < SPDK_NVMF_TRSTRING_MAX_LEN && trstring[i] != 0) { 1091 trid->trstring[i] = toupper(trstring[i]); 1092 i++; 1093 } 1094 1095 if (trstring[i] != 0) { 1096 return -EINVAL; 1097 } else { 1098 trid->trstring[i] = 0; 1099 return 0; 1100 } 1101 } 1102 1103 int 1104 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str) 1105 { 1106 if (trtype == NULL || str == NULL) { 1107 return -EINVAL; 1108 } 1109 1110 if (strcasecmp(str, "PCIe") == 0) { 1111 *trtype = SPDK_NVME_TRANSPORT_PCIE; 1112 } else if (strcasecmp(str, "RDMA") == 0) { 1113 *trtype = SPDK_NVME_TRANSPORT_RDMA; 1114 } else if (strcasecmp(str, "FC") == 0) { 1115 *trtype = SPDK_NVME_TRANSPORT_FC; 1116 } else if (strcasecmp(str, "TCP") == 0) { 1117 *trtype = SPDK_NVME_TRANSPORT_TCP; 1118 } else if (strcasecmp(str, "VFIOUSER") == 0) { 1119 *trtype = SPDK_NVME_TRANSPORT_VFIOUSER; 1120 } else { 1121 *trtype = SPDK_NVME_TRANSPORT_CUSTOM; 1122 } 1123 return 0; 1124 } 1125 1126 const char * 1127 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype) 1128 { 1129 switch (trtype) { 1130 case SPDK_NVME_TRANSPORT_PCIE: 1131 return "PCIe"; 1132 case SPDK_NVME_TRANSPORT_RDMA: 1133 return "RDMA"; 1134 case SPDK_NVME_TRANSPORT_FC: 1135 return "FC"; 1136 case SPDK_NVME_TRANSPORT_TCP: 1137 return "TCP"; 1138 case SPDK_NVME_TRANSPORT_VFIOUSER: 1139 return "VFIOUSER"; 1140 case SPDK_NVME_TRANSPORT_CUSTOM: 1141 return "CUSTOM"; 1142 default: 1143 return NULL; 1144 } 1145 } 1146 1147 int 1148 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str) 1149 { 1150 if (adrfam == NULL || str == NULL) { 1151 return -EINVAL; 1152 } 1153 1154 if (strcasecmp(str, "IPv4") == 0) { 1155 *adrfam = SPDK_NVMF_ADRFAM_IPV4; 1156 } else if (strcasecmp(str, "IPv6") == 0) { 1157 *adrfam = SPDK_NVMF_ADRFAM_IPV6; 1158 } else if (strcasecmp(str, "IB") == 0) { 1159 *adrfam = SPDK_NVMF_ADRFAM_IB; 1160 } else if (strcasecmp(str, "FC") == 0) { 1161 *adrfam = SPDK_NVMF_ADRFAM_FC; 1162 } else { 1163 return -ENOENT; 1164 } 1165 return 0; 1166 } 1167 1168 const char * 1169 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam) 1170 { 1171 switch (adrfam) { 1172 case SPDK_NVMF_ADRFAM_IPV4: 1173 return "IPv4"; 1174 case SPDK_NVMF_ADRFAM_IPV6: 1175 return "IPv6"; 1176 case SPDK_NVMF_ADRFAM_IB: 1177 return "IB"; 1178 case SPDK_NVMF_ADRFAM_FC: 1179 return "FC"; 1180 default: 1181 return NULL; 1182 } 1183 } 1184 1185 static size_t 1186 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size) 1187 { 1188 1189 const char *sep, *sep1; 1190 const char *whitespace = " \t\n"; 1191 size_t key_len, val_len; 1192 1193 *str += strspn(*str, whitespace); 1194 1195 sep = strchr(*str, ':'); 1196 if (!sep) { 1197 sep = strchr(*str, '='); 1198 if (!sep) { 1199 SPDK_ERRLOG("Key without ':' or '=' separator\n"); 1200 return 0; 1201 } 1202 } else { 1203 sep1 = strchr(*str, '='); 1204 if ((sep1 != NULL) && (sep1 < sep)) { 1205 sep = sep1; 1206 } 1207 } 1208 1209 key_len = sep - *str; 1210 if (key_len >= key_buf_size) { 1211 SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n", 1212 key_len, key_buf_size - 1); 1213 return 0; 1214 } 1215 1216 memcpy(key, *str, key_len); 1217 key[key_len] = '\0'; 1218 1219 *str += key_len + 1; /* Skip key: */ 1220 val_len = strcspn(*str, whitespace); 1221 if (val_len == 0) { 1222 SPDK_ERRLOG("Key without value\n"); 1223 return 0; 1224 } 1225 1226 if (val_len >= val_buf_size) { 1227 SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n", 1228 val_len, val_buf_size - 1); 1229 return 0; 1230 } 1231 1232 memcpy(val, *str, val_len); 1233 val[val_len] = '\0'; 1234 1235 *str += val_len; 1236 1237 return val_len; 1238 } 1239 1240 int 1241 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str) 1242 { 1243 size_t val_len; 1244 char key[32]; 1245 char val[1024]; 1246 1247 if (trid == NULL || str == NULL) { 1248 return -EINVAL; 1249 } 1250 1251 while (*str != '\0') { 1252 1253 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1254 1255 if (val_len == 0) { 1256 SPDK_ERRLOG("Failed to parse transport ID\n"); 1257 return -EINVAL; 1258 } 1259 1260 if (strcasecmp(key, "trtype") == 0) { 1261 if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) { 1262 SPDK_ERRLOG("invalid transport '%s'\n", val); 1263 return -EINVAL; 1264 } 1265 if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) { 1266 SPDK_ERRLOG("Unknown trtype '%s'\n", val); 1267 return -EINVAL; 1268 } 1269 } else if (strcasecmp(key, "adrfam") == 0) { 1270 if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) { 1271 SPDK_ERRLOG("Unknown adrfam '%s'\n", val); 1272 return -EINVAL; 1273 } 1274 } else if (strcasecmp(key, "traddr") == 0) { 1275 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1276 SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n", 1277 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1278 return -EINVAL; 1279 } 1280 memcpy(trid->traddr, val, val_len + 1); 1281 } else if (strcasecmp(key, "trsvcid") == 0) { 1282 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1283 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1284 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1285 return -EINVAL; 1286 } 1287 memcpy(trid->trsvcid, val, val_len + 1); 1288 } else if (strcasecmp(key, "priority") == 0) { 1289 if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) { 1290 SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n", 1291 val_len, SPDK_NVMF_PRIORITY_MAX_LEN); 1292 return -EINVAL; 1293 } 1294 trid->priority = spdk_strtol(val, 10); 1295 } else if (strcasecmp(key, "subnqn") == 0) { 1296 if (val_len > SPDK_NVMF_NQN_MAX_LEN) { 1297 SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n", 1298 val_len, SPDK_NVMF_NQN_MAX_LEN); 1299 return -EINVAL; 1300 } 1301 memcpy(trid->subnqn, val, val_len + 1); 1302 } else if (strcasecmp(key, "hostaddr") == 0) { 1303 continue; 1304 } else if (strcasecmp(key, "hostsvcid") == 0) { 1305 continue; 1306 } else if (strcasecmp(key, "hostnqn") == 0) { 1307 continue; 1308 } else if (strcasecmp(key, "ns") == 0) { 1309 /* 1310 * Special case. The namespace id parameter may 1311 * optionally be passed in the transport id string 1312 * for an SPDK application (e.g. spdk_nvme_perf) 1313 * and additionally parsed therein to limit 1314 * targeting a specific namespace. For this 1315 * scenario, just silently ignore this key 1316 * rather than letting it default to logging 1317 * it as an invalid key. 1318 */ 1319 continue; 1320 } else if (strcasecmp(key, "alt_traddr") == 0) { 1321 /* 1322 * Used by applications for enabling transport ID failover. 1323 * Please see the case above for more information on custom parameters. 1324 */ 1325 continue; 1326 } else { 1327 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1328 } 1329 } 1330 1331 return 0; 1332 } 1333 1334 int 1335 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str) 1336 { 1337 1338 size_t key_size = 32; 1339 size_t val_size = 1024; 1340 size_t val_len; 1341 char key[key_size]; 1342 char val[val_size]; 1343 1344 if (hostid == NULL || str == NULL) { 1345 return -EINVAL; 1346 } 1347 1348 while (*str != '\0') { 1349 1350 val_len = parse_next_key(&str, key, val, key_size, val_size); 1351 1352 if (val_len == 0) { 1353 SPDK_ERRLOG("Failed to parse host ID\n"); 1354 return val_len; 1355 } 1356 1357 /* Ignore the rest of the options from the transport ID. */ 1358 if (strcasecmp(key, "trtype") == 0) { 1359 continue; 1360 } else if (strcasecmp(key, "adrfam") == 0) { 1361 continue; 1362 } else if (strcasecmp(key, "traddr") == 0) { 1363 continue; 1364 } else if (strcasecmp(key, "trsvcid") == 0) { 1365 continue; 1366 } else if (strcasecmp(key, "subnqn") == 0) { 1367 continue; 1368 } else if (strcasecmp(key, "priority") == 0) { 1369 continue; 1370 } else if (strcasecmp(key, "ns") == 0) { 1371 continue; 1372 } else if (strcasecmp(key, "hostaddr") == 0) { 1373 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) { 1374 SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n", 1375 val_len, SPDK_NVMF_TRADDR_MAX_LEN); 1376 return -EINVAL; 1377 } 1378 memcpy(hostid->hostaddr, val, val_len + 1); 1379 1380 } else if (strcasecmp(key, "hostsvcid") == 0) { 1381 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) { 1382 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n", 1383 val_len, SPDK_NVMF_TRSVCID_MAX_LEN); 1384 return -EINVAL; 1385 } 1386 memcpy(hostid->hostsvcid, val, val_len + 1); 1387 } else { 1388 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key); 1389 } 1390 } 1391 1392 return 0; 1393 } 1394 1395 static int 1396 cmp_int(int a, int b) 1397 { 1398 return a - b; 1399 } 1400 1401 int 1402 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1, 1403 const struct spdk_nvme_transport_id *trid2) 1404 { 1405 int cmp; 1406 1407 if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) { 1408 cmp = strcasecmp(trid1->trstring, trid2->trstring); 1409 } else { 1410 cmp = cmp_int(trid1->trtype, trid2->trtype); 1411 } 1412 1413 if (cmp) { 1414 return cmp; 1415 } 1416 1417 if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) { 1418 struct spdk_pci_addr pci_addr1 = {}; 1419 struct spdk_pci_addr pci_addr2 = {}; 1420 1421 /* Normalize PCI addresses before comparing */ 1422 if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 || 1423 spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) { 1424 return -1; 1425 } 1426 1427 /* PCIe transport ID only uses trtype and traddr */ 1428 return spdk_pci_addr_compare(&pci_addr1, &pci_addr2); 1429 } 1430 1431 cmp = strcasecmp(trid1->traddr, trid2->traddr); 1432 if (cmp) { 1433 return cmp; 1434 } 1435 1436 cmp = cmp_int(trid1->adrfam, trid2->adrfam); 1437 if (cmp) { 1438 return cmp; 1439 } 1440 1441 cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid); 1442 if (cmp) { 1443 return cmp; 1444 } 1445 1446 cmp = strcmp(trid1->subnqn, trid2->subnqn); 1447 if (cmp) { 1448 return cmp; 1449 } 1450 1451 return 0; 1452 } 1453 1454 int 1455 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str) 1456 { 1457 size_t val_len; 1458 char key[32]; 1459 char val[1024]; 1460 1461 if (prchk_flags == NULL || str == NULL) { 1462 return -EINVAL; 1463 } 1464 1465 while (*str != '\0') { 1466 val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); 1467 1468 if (val_len == 0) { 1469 SPDK_ERRLOG("Failed to parse prchk\n"); 1470 return -EINVAL; 1471 } 1472 1473 if (strcasecmp(key, "prchk") == 0) { 1474 if (strcasestr(val, "reftag") != NULL) { 1475 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; 1476 } 1477 if (strcasestr(val, "guard") != NULL) { 1478 *prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; 1479 } 1480 } else { 1481 SPDK_ERRLOG("Unknown key '%s'\n", key); 1482 return -EINVAL; 1483 } 1484 } 1485 1486 return 0; 1487 } 1488 1489 const char * 1490 spdk_nvme_prchk_flags_str(uint32_t prchk_flags) 1491 { 1492 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { 1493 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1494 return "prchk:reftag|guard"; 1495 } else { 1496 return "prchk:reftag"; 1497 } 1498 } else { 1499 if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { 1500 return "prchk:guard"; 1501 } else { 1502 return NULL; 1503 } 1504 } 1505 } 1506 1507 struct spdk_nvme_probe_ctx * 1508 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid, 1509 void *cb_ctx, 1510 spdk_nvme_probe_cb probe_cb, 1511 spdk_nvme_attach_cb attach_cb, 1512 spdk_nvme_remove_cb remove_cb) 1513 { 1514 int rc; 1515 struct spdk_nvme_probe_ctx *probe_ctx; 1516 1517 rc = nvme_driver_init(); 1518 if (rc != 0) { 1519 return NULL; 1520 } 1521 1522 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1523 if (!probe_ctx) { 1524 return NULL; 1525 } 1526 1527 nvme_probe_ctx_init(probe_ctx, trid, NULL, cb_ctx, probe_cb, attach_cb, remove_cb); 1528 rc = nvme_probe_internal(probe_ctx, false); 1529 if (rc != 0) { 1530 free(probe_ctx); 1531 return NULL; 1532 } 1533 1534 return probe_ctx; 1535 } 1536 1537 int 1538 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx) 1539 { 1540 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; 1541 1542 if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 1543 free(probe_ctx); 1544 return 0; 1545 } 1546 1547 TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) { 1548 nvme_ctrlr_poll_internal(ctrlr, probe_ctx); 1549 } 1550 1551 if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) { 1552 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 1553 g_spdk_nvme_driver->initialized = true; 1554 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 1555 free(probe_ctx); 1556 return 0; 1557 } 1558 1559 return -EAGAIN; 1560 } 1561 1562 struct spdk_nvme_probe_ctx * 1563 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid, 1564 const struct spdk_nvme_ctrlr_opts *opts, 1565 spdk_nvme_attach_cb attach_cb) 1566 { 1567 int rc; 1568 spdk_nvme_probe_cb probe_cb = NULL; 1569 struct spdk_nvme_probe_ctx *probe_ctx; 1570 1571 rc = nvme_driver_init(); 1572 if (rc != 0) { 1573 return NULL; 1574 } 1575 1576 probe_ctx = calloc(1, sizeof(*probe_ctx)); 1577 if (!probe_ctx) { 1578 return NULL; 1579 } 1580 1581 if (opts) { 1582 probe_cb = nvme_connect_probe_cb; 1583 } 1584 1585 nvme_probe_ctx_init(probe_ctx, trid, opts, (void *)opts, probe_cb, attach_cb, NULL); 1586 rc = nvme_probe_internal(probe_ctx, true); 1587 if (rc != 0) { 1588 free(probe_ctx); 1589 return NULL; 1590 } 1591 1592 return probe_ctx; 1593 } 1594 1595 int 1596 nvme_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service, 1597 long int *port) 1598 { 1599 struct addrinfo *res; 1600 struct addrinfo hints; 1601 int ret; 1602 1603 memset(&hints, 0, sizeof(hints)); 1604 hints.ai_family = family; 1605 hints.ai_socktype = SOCK_STREAM; 1606 hints.ai_protocol = 0; 1607 1608 if (addr == NULL || service == NULL) { 1609 SPDK_ERRLOG("addr and service must both be non-NULL\n"); 1610 return -EINVAL; 1611 } 1612 1613 *port = spdk_strtol(service, 10); 1614 if (*port <= 0 || *port >= 65536) { 1615 SPDK_ERRLOG("Invalid port: %s\n", service); 1616 return -EINVAL; 1617 } 1618 1619 ret = getaddrinfo(addr, service, &hints, &res); 1620 if (ret) { 1621 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 1622 return -(abs(ret)); 1623 } 1624 1625 if (res->ai_addrlen > sizeof(*sa)) { 1626 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 1627 ret = -EINVAL; 1628 } else { 1629 memcpy(sa, res->ai_addr, res->ai_addrlen); 1630 } 1631 1632 freeaddrinfo(res); 1633 return ret; 1634 } 1635 1636 int 1637 nvme_get_default_hostnqn(char *buf, int len) 1638 { 1639 char uuid[SPDK_UUID_STRING_LEN]; 1640 int rc; 1641 1642 spdk_uuid_fmt_lower(uuid, sizeof(uuid), &g_spdk_nvme_driver->default_extended_host_id); 1643 rc = snprintf(buf, len, "nqn.2014-08.org.nvmexpress:uuid:%s", uuid); 1644 if (rc < 0 || rc >= len) { 1645 return -EINVAL; 1646 } 1647 1648 return 0; 1649 } 1650 1651 SPDK_LOG_REGISTER_COMPONENT(nvme) 1652