xref: /spdk/lib/nvme/nvme.c (revision fecffda6ecf8853b82edccde429b68252f0a62c5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2015 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4  */
5 
6 #include "spdk/nvmf_spec.h"
7 #include "spdk/string.h"
8 #include "spdk/env.h"
9 #include "nvme_internal.h"
10 #include "nvme_io_msg.h"
11 
12 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
13 
14 struct nvme_driver	*g_spdk_nvme_driver;
15 pid_t			g_spdk_nvme_pid;
16 
17 /* gross timeout of 180 seconds in milliseconds */
18 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
19 
20 /* Per-process attached controller list */
21 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
22 	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
23 
24 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
25 static bool
26 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
27 {
28 	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
29 }
30 
31 void
32 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
33 		     struct spdk_nvme_ctrlr *ctrlr)
34 {
35 	TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
36 }
37 
38 static void
39 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr)
40 {
41 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
42 	if (nvme_ctrlr_shared(ctrlr)) {
43 		TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
44 	} else {
45 		TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
46 	}
47 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
48 }
49 
50 static int
51 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr,
52 			struct nvme_ctrlr_detach_ctx **_ctx)
53 {
54 	struct nvme_ctrlr_detach_ctx *ctx;
55 	int ref_count;
56 
57 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
58 
59 	ref_count = nvme_ctrlr_get_ref_count(ctrlr);
60 	assert(ref_count > 0);
61 
62 	if (ref_count == 1) {
63 		/* This is the last reference to the controller, so we need to
64 		 * allocate a context to destruct it.
65 		 */
66 		ctx = calloc(1, sizeof(*ctx));
67 		if (ctx == NULL) {
68 			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
69 
70 			return -ENOMEM;
71 		}
72 		ctx->ctrlr = ctrlr;
73 		ctx->cb_fn = nvme_ctrlr_detach_async_finish;
74 
75 		nvme_ctrlr_proc_put_ref(ctrlr);
76 
77 		nvme_io_msg_ctrlr_detach(ctrlr);
78 
79 		nvme_ctrlr_destruct_async(ctrlr, ctx);
80 
81 		*_ctx = ctx;
82 	} else {
83 		nvme_ctrlr_proc_put_ref(ctrlr);
84 	}
85 
86 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
87 
88 	return 0;
89 }
90 
91 static int
92 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx)
93 {
94 	int rc;
95 
96 	rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx);
97 	if (rc == -EAGAIN) {
98 		return -EAGAIN;
99 	}
100 
101 	free(ctx);
102 
103 	return rc;
104 }
105 
106 int
107 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
108 {
109 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
110 	int rc;
111 
112 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
113 	if (rc != 0) {
114 		return rc;
115 	} else if (ctx == NULL) {
116 		/* ctrlr was detached from the caller process but any other process
117 		 * still attaches it.
118 		 */
119 		return 0;
120 	}
121 
122 	while (1) {
123 		rc = nvme_ctrlr_detach_poll_async(ctx);
124 		if (rc != -EAGAIN) {
125 			break;
126 		}
127 		nvme_delay(1000);
128 	}
129 
130 	return 0;
131 }
132 
133 int
134 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr,
135 		       struct spdk_nvme_detach_ctx **_detach_ctx)
136 {
137 	struct spdk_nvme_detach_ctx *detach_ctx;
138 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
139 	int rc;
140 
141 	if (ctrlr == NULL || _detach_ctx == NULL) {
142 		return -EINVAL;
143 	}
144 
145 	/* Use a context header to poll detachment for multiple controllers.
146 	 * Allocate an new one if not allocated yet, or use the passed one otherwise.
147 	 */
148 	detach_ctx = *_detach_ctx;
149 	if (detach_ctx == NULL) {
150 		detach_ctx = calloc(1, sizeof(*detach_ctx));
151 		if (detach_ctx == NULL) {
152 			return -ENOMEM;
153 		}
154 		TAILQ_INIT(&detach_ctx->head);
155 	}
156 
157 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
158 	if (rc != 0 || ctx == NULL) {
159 		/* If this detach failed and the context header is empty, it means we just
160 		 * allocated the header and need to free it before returning.
161 		 */
162 		if (TAILQ_EMPTY(&detach_ctx->head)) {
163 			free(detach_ctx);
164 		}
165 		return rc;
166 	}
167 
168 	/* Append a context for this detachment to the context header. */
169 	TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link);
170 
171 	*_detach_ctx = detach_ctx;
172 
173 	return 0;
174 }
175 
176 int
177 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx)
178 {
179 	struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx;
180 	int rc;
181 
182 	if (detach_ctx == NULL) {
183 		return -EINVAL;
184 	}
185 
186 	TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) {
187 		TAILQ_REMOVE(&detach_ctx->head, ctx, link);
188 
189 		rc = nvme_ctrlr_detach_poll_async(ctx);
190 		if (rc == -EAGAIN) {
191 			/* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */
192 			TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link);
193 		}
194 	}
195 
196 	if (!TAILQ_EMPTY(&detach_ctx->head)) {
197 		return -EAGAIN;
198 	}
199 
200 	free(detach_ctx);
201 	return 0;
202 }
203 
204 void
205 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx)
206 {
207 	while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
208 		;
209 	}
210 }
211 
212 void
213 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
214 {
215 	struct nvme_completion_poll_status	*status = arg;
216 
217 	if (status->timed_out) {
218 		/* There is no routine waiting for the completion of this request, free allocated memory */
219 		spdk_free(status->dma_data);
220 		free(status);
221 		return;
222 	}
223 
224 	/*
225 	 * Copy status into the argument passed by the caller, so that
226 	 *  the caller can check the status to determine if the
227 	 *  the request passed or failed.
228 	 */
229 	memcpy(&status->cpl, cpl, sizeof(*cpl));
230 	status->done = true;
231 }
232 
233 static void
234 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
235 {
236 }
237 
238 int
239 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair,
240 		struct nvme_completion_poll_status *status,
241 		pthread_mutex_t *robust_mutex)
242 {
243 	int rc;
244 
245 	if (robust_mutex) {
246 		nvme_robust_mutex_lock(robust_mutex);
247 	}
248 
249 	if (qpair->poll_group) {
250 		rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0,
251 				dummy_disconnected_qpair_cb);
252 	} else {
253 		rc = spdk_nvme_qpair_process_completions(qpair, 0);
254 	}
255 
256 	if (robust_mutex) {
257 		nvme_robust_mutex_unlock(robust_mutex);
258 	}
259 
260 	if (rc < 0) {
261 		status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
262 		status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
263 		goto error;
264 	}
265 
266 	if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) {
267 		goto error;
268 	}
269 
270 	if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
271 		union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr);
272 		if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) {
273 			status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
274 			status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
275 			goto error;
276 		}
277 	}
278 
279 	if (!status->done) {
280 		return -EAGAIN;
281 	} else if (spdk_nvme_cpl_is_error(&status->cpl)) {
282 		return -EIO;
283 	} else {
284 		return 0;
285 	}
286 error:
287 	/* Either transport error occurred or we've timed out.  Either way, if the response hasn't
288 	 * been received yet, mark the command as timed out, so the status gets freed when the
289 	 * command is completed or aborted.
290 	 */
291 	if (!status->done) {
292 		status->timed_out = true;
293 	}
294 
295 	return -ECANCELED;
296 }
297 
298 /**
299  * Poll qpair for completions until a command completes.
300  *
301  * \param qpair queue to poll
302  * \param status completion status. The user must fill this structure with zeroes before calling
303  * this function
304  * \param robust_mutex optional robust mutex to lock while polling qpair
305  * \param timeout_in_usecs optional timeout
306  *
307  * \return 0 if command completed without error,
308  * -EIO if command completed with error,
309  * -ECANCELED if command is not completed due to transport/device error or time expired
310  *
311  *  The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
312  *  and status as the callback argument.
313  */
314 int
315 nvme_wait_for_completion_robust_lock_timeout(
316 	struct spdk_nvme_qpair *qpair,
317 	struct nvme_completion_poll_status *status,
318 	pthread_mutex_t *robust_mutex,
319 	uint64_t timeout_in_usecs)
320 {
321 	int rc;
322 
323 	if (timeout_in_usecs) {
324 		status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs *
325 				      spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
326 	} else {
327 		status->timeout_tsc = 0;
328 	}
329 
330 	status->cpl.status_raw = 0;
331 	do {
332 		rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex);
333 	} while (rc == -EAGAIN);
334 
335 	return rc;
336 }
337 
338 /**
339  * Poll qpair for completions until a command completes.
340  *
341  * \param qpair queue to poll
342  * \param status completion status. The user must fill this structure with zeroes before calling
343  * this function
344  * \param robust_mutex optional robust mutex to lock while polling qpair
345  *
346  * \return 0 if command completed without error,
347  * -EIO if command completed with error,
348  * -ECANCELED if command is not completed due to transport/device error
349  *
350  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
351  * and status as the callback argument.
352  */
353 int
354 nvme_wait_for_completion_robust_lock(
355 	struct spdk_nvme_qpair *qpair,
356 	struct nvme_completion_poll_status *status,
357 	pthread_mutex_t *robust_mutex)
358 {
359 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0);
360 }
361 
362 int
363 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
364 			 struct nvme_completion_poll_status *status)
365 {
366 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0);
367 }
368 
369 /**
370  * Poll qpair for completions until a command completes.
371  *
372  * \param qpair queue to poll
373  * \param status completion status. The user must fill this structure with zeroes before calling
374  * this function
375  * \param timeout_in_usecs optional timeout
376  *
377  * \return 0 if command completed without error,
378  * -EIO if command completed with error,
379  * -ECANCELED if command is not completed due to transport/device error or time expired
380  *
381  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
382  * and status as the callback argument.
383  */
384 int
385 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
386 				 struct nvme_completion_poll_status *status,
387 				 uint64_t timeout_in_usecs)
388 {
389 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs);
390 }
391 
392 static void
393 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
394 {
395 	struct nvme_request *req = arg;
396 	enum spdk_nvme_data_transfer xfer;
397 
398 	if (req->user_buffer && req->payload_size) {
399 		/* Copy back to the user buffer and free the contig buffer */
400 		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
401 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
402 		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
403 		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
404 			assert(req->pid == getpid());
405 			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
406 		}
407 
408 		spdk_free(req->payload.contig_or_cb_arg);
409 	}
410 
411 	/* Call the user's original callback now that the buffer has been copied */
412 	req->user_cb_fn(req->user_cb_arg, cpl);
413 }
414 
415 /**
416  * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
417  *
418  * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
419  * where the overhead of a copy is not a problem.
420  */
421 struct nvme_request *
422 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
423 				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
424 				void *cb_arg, bool host_to_controller)
425 {
426 	struct nvme_request *req;
427 	void *dma_buffer = NULL;
428 
429 	if (buffer && payload_size) {
430 		dma_buffer = spdk_zmalloc(payload_size, 4096, NULL,
431 					  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
432 		if (!dma_buffer) {
433 			return NULL;
434 		}
435 
436 		if (host_to_controller) {
437 			memcpy(dma_buffer, buffer, payload_size);
438 		}
439 	}
440 
441 	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
442 					   NULL);
443 	if (!req) {
444 		spdk_free(dma_buffer);
445 		return NULL;
446 	}
447 
448 	req->user_cb_fn = cb_fn;
449 	req->user_cb_arg = cb_arg;
450 	req->user_buffer = buffer;
451 	req->cb_arg = req;
452 
453 	return req;
454 }
455 
456 /**
457  * Check if a request has exceeded the controller timeout.
458  *
459  * \param req request to check for timeout.
460  * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
461  * \param active_proc per-process data for the controller associated with req
462  * \param now_tick current time from spdk_get_ticks()
463  * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
464  * 1 if requests newer than req need not be checked.
465  *
466  * The request's timeout callback will be called if needed; the caller is only responsible for
467  * calling this function on each outstanding request.
468  */
469 int
470 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
471 			   struct spdk_nvme_ctrlr_process *active_proc,
472 			   uint64_t now_tick)
473 {
474 	struct spdk_nvme_qpair *qpair = req->qpair;
475 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
476 	uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ?
477 				 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks;
478 
479 	assert(active_proc->timeout_cb_fn != NULL);
480 
481 	if (req->timed_out || req->submit_tick == 0) {
482 		return 0;
483 	}
484 
485 	if (req->pid != g_spdk_nvme_pid) {
486 		return 0;
487 	}
488 
489 	if (nvme_qpair_is_admin_queue(qpair) &&
490 	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
491 		return 0;
492 	}
493 
494 	if (req->submit_tick + timeout_ticks > now_tick) {
495 		return 1;
496 	}
497 
498 	req->timed_out = true;
499 
500 	/*
501 	 * We don't want to expose the admin queue to the user,
502 	 * so when we're timing out admin commands set the
503 	 * qpair to NULL.
504 	 */
505 	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
506 				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
507 				   cid);
508 	return 0;
509 }
510 
511 int
512 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
513 {
514 	int rc = 0;
515 
516 #ifdef __FreeBSD__
517 	pthread_mutex_init(mtx, NULL);
518 #else
519 	pthread_mutexattr_t attr;
520 
521 	if (pthread_mutexattr_init(&attr)) {
522 		return -1;
523 	}
524 	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
525 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
526 	    pthread_mutex_init(mtx, &attr)) {
527 		rc = -1;
528 	}
529 	pthread_mutexattr_destroy(&attr);
530 #endif
531 
532 	return rc;
533 }
534 
535 int
536 nvme_driver_init(void)
537 {
538 	static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER;
539 	int ret = 0;
540 	/* Any socket ID */
541 	int socket_id = -1;
542 
543 	/* Use a special process-private mutex to ensure the global
544 	 * nvme driver object (g_spdk_nvme_driver) gets initialized by
545 	 * only one thread.  Once that object is established and its
546 	 * mutex is initialized, we can unlock this mutex and use that
547 	 * one instead.
548 	 */
549 	pthread_mutex_lock(&g_init_mutex);
550 
551 	/* Each process needs its own pid. */
552 	g_spdk_nvme_pid = getpid();
553 
554 	/*
555 	 * Only one thread from one process will do this driver init work.
556 	 * The primary process will reserve the shared memory and do the
557 	 *  initialization.
558 	 * The secondary process will lookup the existing reserved memory.
559 	 */
560 	if (spdk_process_is_primary()) {
561 		/* The unique named memzone already reserved. */
562 		if (g_spdk_nvme_driver != NULL) {
563 			pthread_mutex_unlock(&g_init_mutex);
564 			return 0;
565 		} else {
566 			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
567 					     sizeof(struct nvme_driver), socket_id,
568 					     SPDK_MEMZONE_NO_IOVA_CONTIG);
569 		}
570 
571 		if (g_spdk_nvme_driver == NULL) {
572 			SPDK_ERRLOG("primary process failed to reserve memory\n");
573 			pthread_mutex_unlock(&g_init_mutex);
574 			return -1;
575 		}
576 	} else {
577 		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
578 
579 		/* The unique named memzone already reserved by the primary process. */
580 		if (g_spdk_nvme_driver != NULL) {
581 			int ms_waited = 0;
582 
583 			/* Wait the nvme driver to get initialized. */
584 			while ((g_spdk_nvme_driver->initialized == false) &&
585 			       (ms_waited < g_nvme_driver_timeout_ms)) {
586 				ms_waited++;
587 				nvme_delay(1000); /* delay 1ms */
588 			}
589 			if (g_spdk_nvme_driver->initialized == false) {
590 				SPDK_ERRLOG("timeout waiting for primary process to init\n");
591 				pthread_mutex_unlock(&g_init_mutex);
592 				return -1;
593 			}
594 		} else {
595 			SPDK_ERRLOG("primary process is not started yet\n");
596 			pthread_mutex_unlock(&g_init_mutex);
597 			return -1;
598 		}
599 
600 		pthread_mutex_unlock(&g_init_mutex);
601 		return 0;
602 	}
603 
604 	/*
605 	 * At this moment, only one thread from the primary process will do
606 	 * the g_spdk_nvme_driver initialization
607 	 */
608 	assert(spdk_process_is_primary());
609 
610 	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
611 	if (ret != 0) {
612 		SPDK_ERRLOG("failed to initialize mutex\n");
613 		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
614 		pthread_mutex_unlock(&g_init_mutex);
615 		return ret;
616 	}
617 
618 	/* The lock in the shared g_spdk_nvme_driver object is now ready to
619 	 * be used - so we can unlock the g_init_mutex here.
620 	 */
621 	pthread_mutex_unlock(&g_init_mutex);
622 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
623 
624 	g_spdk_nvme_driver->initialized = false;
625 	g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen();
626 	if (g_spdk_nvme_driver->hotplug_fd < 0) {
627 		SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n");
628 	}
629 
630 	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
631 
632 	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
633 
634 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
635 
636 	return ret;
637 }
638 
639 /* This function must only be called while holding g_spdk_nvme_driver->lock */
640 int
641 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
642 		 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle)
643 {
644 	struct spdk_nvme_ctrlr *ctrlr;
645 	struct spdk_nvme_ctrlr_opts opts;
646 
647 	assert(trid != NULL);
648 
649 	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
650 
651 	if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) {
652 		ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
653 		if (ctrlr) {
654 			/* This ctrlr already exists. */
655 
656 			if (ctrlr->is_destructed) {
657 				/* This ctrlr is being destructed asynchronously. */
658 				SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n",
659 					    trid->traddr);
660 				return -EBUSY;
661 			}
662 
663 			/* Increase the ref count before calling attach_cb() as the user may
664 			* call nvme_detach() immediately. */
665 			nvme_ctrlr_proc_get_ref(ctrlr);
666 
667 			if (probe_ctx->attach_cb) {
668 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
669 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
670 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
671 			}
672 			return 0;
673 		}
674 
675 		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
676 		if (ctrlr == NULL) {
677 			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
678 			return -1;
679 		}
680 		ctrlr->remove_cb = probe_ctx->remove_cb;
681 		ctrlr->cb_ctx = probe_ctx->cb_ctx;
682 
683 		nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
684 		TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
685 		return 0;
686 	}
687 
688 	return 1;
689 }
690 
691 static void
692 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
693 			 struct spdk_nvme_probe_ctx *probe_ctx)
694 {
695 	int rc = 0;
696 
697 	rc = nvme_ctrlr_process_init(ctrlr);
698 
699 	if (rc) {
700 		/* Controller failed to initialize. */
701 		TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
702 		SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
703 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
704 		nvme_ctrlr_fail(ctrlr, false);
705 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
706 		nvme_ctrlr_destruct(ctrlr);
707 		return;
708 	}
709 
710 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
711 		return;
712 	}
713 
714 	STAILQ_INIT(&ctrlr->io_producers);
715 
716 	/*
717 	 * Controller has been initialized.
718 	 *  Move it to the attached_ctrlrs list.
719 	 */
720 	TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
721 
722 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
723 	if (nvme_ctrlr_shared(ctrlr)) {
724 		TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
725 	} else {
726 		TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
727 	}
728 
729 	/*
730 	 * Increase the ref count before calling attach_cb() as the user may
731 	 * call nvme_detach() immediately.
732 	 */
733 	nvme_ctrlr_proc_get_ref(ctrlr);
734 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
735 
736 	if (probe_ctx->attach_cb) {
737 		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
738 	}
739 }
740 
741 static int
742 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx)
743 {
744 	int rc = 0;
745 
746 	while (true) {
747 		rc = spdk_nvme_probe_poll_async(probe_ctx);
748 		if (rc != -EAGAIN) {
749 			return rc;
750 		}
751 	}
752 
753 	return rc;
754 }
755 
756 /* This function must not be called while holding g_spdk_nvme_driver->lock */
757 static struct spdk_nvme_ctrlr *
758 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid)
759 {
760 	struct spdk_nvme_ctrlr *ctrlr;
761 
762 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
763 	ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
764 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
765 
766 	return ctrlr;
767 }
768 
769 /* This function must be called while holding g_spdk_nvme_driver->lock */
770 struct spdk_nvme_ctrlr *
771 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid)
772 {
773 	struct spdk_nvme_ctrlr *ctrlr;
774 
775 	/* Search per-process list */
776 	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
777 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
778 			return ctrlr;
779 		}
780 	}
781 
782 	/* Search multi-process shared list */
783 	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
784 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
785 			return ctrlr;
786 		}
787 	}
788 
789 	return NULL;
790 }
791 
792 /* This function must only be called while holding g_spdk_nvme_driver->lock */
793 static int
794 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx,
795 		    bool direct_connect)
796 {
797 	int rc;
798 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
799 
800 	if (strlen(probe_ctx->trid.trstring) == 0) {
801 		/* If user didn't provide trstring, derive it from trtype */
802 		spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype);
803 	}
804 
805 	if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) {
806 		SPDK_ERRLOG("NVMe trtype %u (%s) not available\n",
807 			    probe_ctx->trid.trtype, probe_ctx->trid.trstring);
808 		return -1;
809 	}
810 
811 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
812 
813 	rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect);
814 	if (rc != 0) {
815 		SPDK_ERRLOG("NVMe ctrlr scan failed\n");
816 		TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
817 			TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
818 			nvme_transport_ctrlr_destruct(ctrlr);
819 		}
820 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
821 		return -1;
822 	}
823 
824 	/*
825 	 * Probe controllers on the shared_attached_ctrlrs list
826 	 */
827 	if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
828 		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
829 			/* Do not attach other ctrlrs if user specify a valid trid */
830 			if ((strlen(probe_ctx->trid.traddr) != 0) &&
831 			    (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) {
832 				continue;
833 			}
834 
835 			/* Do not attach if we failed to initialize it in this process */
836 			if (nvme_ctrlr_get_current_process(ctrlr) == NULL) {
837 				continue;
838 			}
839 
840 			nvme_ctrlr_proc_get_ref(ctrlr);
841 
842 			/*
843 			 * Unlock while calling attach_cb() so the user can call other functions
844 			 *  that may take the driver lock, like nvme_detach().
845 			 */
846 			if (probe_ctx->attach_cb) {
847 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
848 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
849 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
850 			}
851 		}
852 	}
853 
854 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
855 
856 	return 0;
857 }
858 
859 static void
860 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
861 		    const struct spdk_nvme_transport_id *trid,
862 		    void *cb_ctx,
863 		    spdk_nvme_probe_cb probe_cb,
864 		    spdk_nvme_attach_cb attach_cb,
865 		    spdk_nvme_remove_cb remove_cb)
866 {
867 	probe_ctx->trid = *trid;
868 	probe_ctx->cb_ctx = cb_ctx;
869 	probe_ctx->probe_cb = probe_cb;
870 	probe_ctx->attach_cb = attach_cb;
871 	probe_ctx->remove_cb = remove_cb;
872 	TAILQ_INIT(&probe_ctx->init_ctrlrs);
873 }
874 
875 int
876 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
877 		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
878 		spdk_nvme_remove_cb remove_cb)
879 {
880 	struct spdk_nvme_transport_id trid_pcie;
881 	struct spdk_nvme_probe_ctx *probe_ctx;
882 
883 	if (trid == NULL) {
884 		memset(&trid_pcie, 0, sizeof(trid_pcie));
885 		spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
886 		trid = &trid_pcie;
887 	}
888 
889 	probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb,
890 					  attach_cb, remove_cb);
891 	if (!probe_ctx) {
892 		SPDK_ERRLOG("Create probe context failed\n");
893 		return -1;
894 	}
895 
896 	/*
897 	 * Keep going even if one or more nvme_attach() calls failed,
898 	 *  but maintain the value of rc to signal errors when we return.
899 	 */
900 	return nvme_init_controllers(probe_ctx);
901 }
902 
903 static bool
904 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
905 		      struct spdk_nvme_ctrlr_opts *opts)
906 {
907 	struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx;
908 
909 	assert(requested_opts);
910 	memcpy(opts, requested_opts, sizeof(*opts));
911 
912 	return true;
913 }
914 
915 static void
916 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts,
917 		     const struct spdk_nvme_ctrlr_opts *opts_user,
918 		     size_t opts_size_user)
919 {
920 	assert(opts);
921 	assert(opts_user);
922 
923 	spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user);
924 
925 #define FIELD_OK(field) \
926 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size)
927 
928 #define SET_FIELD(field) \
929 	if (FIELD_OK(field)) { \
930 			opts->field = opts_user->field; \
931 	}
932 
933 #define SET_FIELD_ARRAY(field) \
934 	if (FIELD_OK(field)) { \
935 		memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \
936 	}
937 
938 	SET_FIELD(num_io_queues);
939 	SET_FIELD(use_cmb_sqs);
940 	SET_FIELD(no_shn_notification);
941 	SET_FIELD(arb_mechanism);
942 	SET_FIELD(arbitration_burst);
943 	SET_FIELD(low_priority_weight);
944 	SET_FIELD(medium_priority_weight);
945 	SET_FIELD(high_priority_weight);
946 	SET_FIELD(keep_alive_timeout_ms);
947 	SET_FIELD(transport_retry_count);
948 	SET_FIELD(io_queue_size);
949 	SET_FIELD_ARRAY(hostnqn);
950 	SET_FIELD(io_queue_requests);
951 	SET_FIELD_ARRAY(src_addr);
952 	SET_FIELD_ARRAY(src_svcid);
953 	SET_FIELD_ARRAY(host_id);
954 	SET_FIELD_ARRAY(extended_host_id);
955 	SET_FIELD(command_set);
956 	SET_FIELD(admin_timeout_ms);
957 	SET_FIELD(header_digest);
958 	SET_FIELD(data_digest);
959 	SET_FIELD(disable_error_logging);
960 	SET_FIELD(transport_ack_timeout);
961 	SET_FIELD(admin_queue_size);
962 	SET_FIELD(fabrics_connect_timeout_us);
963 	SET_FIELD(disable_read_ana_log_page);
964 	SET_FIELD(disable_read_changed_ns_list_log_page);
965 	SET_FIELD_ARRAY(psk);
966 
967 #undef FIELD_OK
968 #undef SET_FIELD
969 #undef SET_FIELD_ARRAY
970 }
971 
972 struct spdk_nvme_ctrlr *
973 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
974 		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
975 {
976 	int rc;
977 	struct spdk_nvme_ctrlr *ctrlr = NULL;
978 	struct spdk_nvme_probe_ctx *probe_ctx;
979 	struct spdk_nvme_ctrlr_opts *opts_local_p = NULL;
980 	struct spdk_nvme_ctrlr_opts opts_local;
981 
982 	if (trid == NULL) {
983 		SPDK_ERRLOG("No transport ID specified\n");
984 		return NULL;
985 	}
986 
987 	if (opts) {
988 		opts_local_p = &opts_local;
989 		nvme_ctrlr_opts_init(opts_local_p, opts, opts_size);
990 	}
991 
992 	probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL);
993 	if (!probe_ctx) {
994 		SPDK_ERRLOG("Create probe context failed\n");
995 		return NULL;
996 	}
997 
998 	rc = nvme_init_controllers(probe_ctx);
999 	if (rc != 0) {
1000 		return NULL;
1001 	}
1002 
1003 	ctrlr = nvme_get_ctrlr_by_trid(trid);
1004 
1005 	return ctrlr;
1006 }
1007 
1008 void
1009 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid,
1010 				  enum spdk_nvme_transport_type trtype)
1011 {
1012 	const char *trstring = "";
1013 
1014 	trid->trtype = trtype;
1015 	switch (trtype) {
1016 	case SPDK_NVME_TRANSPORT_FC:
1017 		trstring = SPDK_NVME_TRANSPORT_NAME_FC;
1018 		break;
1019 	case SPDK_NVME_TRANSPORT_PCIE:
1020 		trstring = SPDK_NVME_TRANSPORT_NAME_PCIE;
1021 		break;
1022 	case SPDK_NVME_TRANSPORT_RDMA:
1023 		trstring = SPDK_NVME_TRANSPORT_NAME_RDMA;
1024 		break;
1025 	case SPDK_NVME_TRANSPORT_TCP:
1026 		trstring = SPDK_NVME_TRANSPORT_NAME_TCP;
1027 		break;
1028 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1029 		trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER;
1030 		break;
1031 	case SPDK_NVME_TRANSPORT_CUSTOM:
1032 		trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM;
1033 		break;
1034 	default:
1035 		SPDK_ERRLOG("no available transports\n");
1036 		assert(0);
1037 		return;
1038 	}
1039 	snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1040 }
1041 
1042 int
1043 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring)
1044 {
1045 	int i = 0;
1046 
1047 	/* Note: gcc-11 has some false positive -Wstringop-overread warnings with LTO builds if we
1048 	 * use strnlen here.  So do the trstring copy manually instead.  See GitHub issue #2391.
1049 	 */
1050 
1051 	/* cast official trstring to uppercase version of input. */
1052 	while (i < SPDK_NVMF_TRSTRING_MAX_LEN && trstring[i] != 0) {
1053 		trid->trstring[i] = toupper(trstring[i]);
1054 		i++;
1055 	}
1056 
1057 	if (trstring[i] != 0) {
1058 		return -EINVAL;
1059 	} else {
1060 		trid->trstring[i] = 0;
1061 		return 0;
1062 	}
1063 }
1064 
1065 int
1066 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
1067 {
1068 	if (trtype == NULL || str == NULL) {
1069 		return -EINVAL;
1070 	}
1071 
1072 	if (strcasecmp(str, "PCIe") == 0) {
1073 		*trtype = SPDK_NVME_TRANSPORT_PCIE;
1074 	} else if (strcasecmp(str, "RDMA") == 0) {
1075 		*trtype = SPDK_NVME_TRANSPORT_RDMA;
1076 	} else if (strcasecmp(str, "FC") == 0) {
1077 		*trtype = SPDK_NVME_TRANSPORT_FC;
1078 	} else if (strcasecmp(str, "TCP") == 0) {
1079 		*trtype = SPDK_NVME_TRANSPORT_TCP;
1080 	} else if (strcasecmp(str, "VFIOUSER") == 0) {
1081 		*trtype = SPDK_NVME_TRANSPORT_VFIOUSER;
1082 	} else {
1083 		*trtype = SPDK_NVME_TRANSPORT_CUSTOM;
1084 	}
1085 	return 0;
1086 }
1087 
1088 const char *
1089 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
1090 {
1091 	switch (trtype) {
1092 	case SPDK_NVME_TRANSPORT_PCIE:
1093 		return "PCIe";
1094 	case SPDK_NVME_TRANSPORT_RDMA:
1095 		return "RDMA";
1096 	case SPDK_NVME_TRANSPORT_FC:
1097 		return "FC";
1098 	case SPDK_NVME_TRANSPORT_TCP:
1099 		return "TCP";
1100 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1101 		return "VFIOUSER";
1102 	case SPDK_NVME_TRANSPORT_CUSTOM:
1103 		return "CUSTOM";
1104 	default:
1105 		return NULL;
1106 	}
1107 }
1108 
1109 int
1110 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
1111 {
1112 	if (adrfam == NULL || str == NULL) {
1113 		return -EINVAL;
1114 	}
1115 
1116 	if (strcasecmp(str, "IPv4") == 0) {
1117 		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
1118 	} else if (strcasecmp(str, "IPv6") == 0) {
1119 		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
1120 	} else if (strcasecmp(str, "IB") == 0) {
1121 		*adrfam = SPDK_NVMF_ADRFAM_IB;
1122 	} else if (strcasecmp(str, "FC") == 0) {
1123 		*adrfam = SPDK_NVMF_ADRFAM_FC;
1124 	} else {
1125 		return -ENOENT;
1126 	}
1127 	return 0;
1128 }
1129 
1130 const char *
1131 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
1132 {
1133 	switch (adrfam) {
1134 	case SPDK_NVMF_ADRFAM_IPV4:
1135 		return "IPv4";
1136 	case SPDK_NVMF_ADRFAM_IPV6:
1137 		return "IPv6";
1138 	case SPDK_NVMF_ADRFAM_IB:
1139 		return "IB";
1140 	case SPDK_NVMF_ADRFAM_FC:
1141 		return "FC";
1142 	default:
1143 		return NULL;
1144 	}
1145 }
1146 
1147 static size_t
1148 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size)
1149 {
1150 
1151 	const char *sep, *sep1;
1152 	const char *whitespace = " \t\n";
1153 	size_t key_len, val_len;
1154 
1155 	*str += strspn(*str, whitespace);
1156 
1157 	sep = strchr(*str, ':');
1158 	if (!sep) {
1159 		sep = strchr(*str, '=');
1160 		if (!sep) {
1161 			SPDK_ERRLOG("Key without ':' or '=' separator\n");
1162 			return 0;
1163 		}
1164 	} else {
1165 		sep1 = strchr(*str, '=');
1166 		if ((sep1 != NULL) && (sep1 < sep)) {
1167 			sep = sep1;
1168 		}
1169 	}
1170 
1171 	key_len = sep - *str;
1172 	if (key_len >= key_buf_size) {
1173 		SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
1174 			    key_len, key_buf_size - 1);
1175 		return 0;
1176 	}
1177 
1178 	memcpy(key, *str, key_len);
1179 	key[key_len] = '\0';
1180 
1181 	*str += key_len + 1; /* Skip key: */
1182 	val_len = strcspn(*str, whitespace);
1183 	if (val_len == 0) {
1184 		SPDK_ERRLOG("Key without value\n");
1185 		return 0;
1186 	}
1187 
1188 	if (val_len >= val_buf_size) {
1189 		SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
1190 			    val_len, val_buf_size - 1);
1191 		return 0;
1192 	}
1193 
1194 	memcpy(val, *str, val_len);
1195 	val[val_len] = '\0';
1196 
1197 	*str += val_len;
1198 
1199 	return val_len;
1200 }
1201 
1202 int
1203 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
1204 {
1205 	size_t val_len;
1206 	char key[32];
1207 	char val[1024];
1208 
1209 	if (trid == NULL || str == NULL) {
1210 		return -EINVAL;
1211 	}
1212 
1213 	while (*str != '\0') {
1214 
1215 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1216 
1217 		if (val_len == 0) {
1218 			SPDK_ERRLOG("Failed to parse transport ID\n");
1219 			return -EINVAL;
1220 		}
1221 
1222 		if (strcasecmp(key, "trtype") == 0) {
1223 			if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) {
1224 				SPDK_ERRLOG("invalid transport '%s'\n", val);
1225 				return -EINVAL;
1226 			}
1227 			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
1228 				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
1229 				return -EINVAL;
1230 			}
1231 		} else if (strcasecmp(key, "adrfam") == 0) {
1232 			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
1233 				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
1234 				return -EINVAL;
1235 			}
1236 		} else if (strcasecmp(key, "traddr") == 0) {
1237 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1238 				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
1239 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1240 				return -EINVAL;
1241 			}
1242 			memcpy(trid->traddr, val, val_len + 1);
1243 		} else if (strcasecmp(key, "trsvcid") == 0) {
1244 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1245 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1246 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1247 				return -EINVAL;
1248 			}
1249 			memcpy(trid->trsvcid, val, val_len + 1);
1250 		} else if (strcasecmp(key, "priority") == 0) {
1251 			if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) {
1252 				SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n",
1253 					    val_len, SPDK_NVMF_PRIORITY_MAX_LEN);
1254 				return -EINVAL;
1255 			}
1256 			trid->priority = spdk_strtol(val, 10);
1257 		} else if (strcasecmp(key, "subnqn") == 0) {
1258 			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
1259 				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
1260 					    val_len, SPDK_NVMF_NQN_MAX_LEN);
1261 				return -EINVAL;
1262 			}
1263 			memcpy(trid->subnqn, val, val_len + 1);
1264 		} else if (strcasecmp(key, "hostaddr") == 0) {
1265 			continue;
1266 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1267 			continue;
1268 		} else if (strcasecmp(key, "hostnqn") == 0) {
1269 			continue;
1270 		} else if (strcasecmp(key, "ns") == 0) {
1271 			/*
1272 			 * Special case.  The namespace id parameter may
1273 			 * optionally be passed in the transport id string
1274 			 * for an SPDK application (e.g. nvme/perf)
1275 			 * and additionally parsed therein to limit
1276 			 * targeting a specific namespace.  For this
1277 			 * scenario, just silently ignore this key
1278 			 * rather than letting it default to logging
1279 			 * it as an invalid key.
1280 			 */
1281 			continue;
1282 		} else if (strcasecmp(key, "alt_traddr") == 0) {
1283 			/*
1284 			 * Used by applications for enabling transport ID failover.
1285 			 * Please see the case above for more information on custom parameters.
1286 			 */
1287 			continue;
1288 		} else {
1289 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1290 		}
1291 	}
1292 
1293 	return 0;
1294 }
1295 
1296 int
1297 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str)
1298 {
1299 
1300 	size_t key_size = 32;
1301 	size_t val_size = 1024;
1302 	size_t val_len;
1303 	char key[key_size];
1304 	char val[val_size];
1305 
1306 	if (hostid == NULL || str == NULL) {
1307 		return -EINVAL;
1308 	}
1309 
1310 	while (*str != '\0') {
1311 
1312 		val_len = parse_next_key(&str, key, val, key_size, val_size);
1313 
1314 		if (val_len == 0) {
1315 			SPDK_ERRLOG("Failed to parse host ID\n");
1316 			return val_len;
1317 		}
1318 
1319 		/* Ignore the rest of the options from the transport ID. */
1320 		if (strcasecmp(key, "trtype") == 0) {
1321 			continue;
1322 		} else if (strcasecmp(key, "adrfam") == 0) {
1323 			continue;
1324 		} else if (strcasecmp(key, "traddr") == 0) {
1325 			continue;
1326 		} else if (strcasecmp(key, "trsvcid") == 0) {
1327 			continue;
1328 		} else if (strcasecmp(key, "subnqn") == 0) {
1329 			continue;
1330 		} else if (strcasecmp(key, "priority") == 0) {
1331 			continue;
1332 		} else if (strcasecmp(key, "ns") == 0) {
1333 			continue;
1334 		} else if (strcasecmp(key, "hostaddr") == 0) {
1335 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1336 				SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
1337 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1338 				return -EINVAL;
1339 			}
1340 			memcpy(hostid->hostaddr, val, val_len + 1);
1341 
1342 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1343 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1344 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1345 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1346 				return -EINVAL;
1347 			}
1348 			memcpy(hostid->hostsvcid, val, val_len + 1);
1349 		} else {
1350 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1351 		}
1352 	}
1353 
1354 	return 0;
1355 }
1356 
1357 static int
1358 cmp_int(int a, int b)
1359 {
1360 	return a - b;
1361 }
1362 
1363 int
1364 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
1365 			       const struct spdk_nvme_transport_id *trid2)
1366 {
1367 	int cmp;
1368 
1369 	if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
1370 		cmp = strcasecmp(trid1->trstring, trid2->trstring);
1371 	} else {
1372 		cmp = cmp_int(trid1->trtype, trid2->trtype);
1373 	}
1374 
1375 	if (cmp) {
1376 		return cmp;
1377 	}
1378 
1379 	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
1380 		struct spdk_pci_addr pci_addr1 = {};
1381 		struct spdk_pci_addr pci_addr2 = {};
1382 
1383 		/* Normalize PCI addresses before comparing */
1384 		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
1385 		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
1386 			return -1;
1387 		}
1388 
1389 		/* PCIe transport ID only uses trtype and traddr */
1390 		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
1391 	}
1392 
1393 	cmp = strcasecmp(trid1->traddr, trid2->traddr);
1394 	if (cmp) {
1395 		return cmp;
1396 	}
1397 
1398 	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
1399 	if (cmp) {
1400 		return cmp;
1401 	}
1402 
1403 	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
1404 	if (cmp) {
1405 		return cmp;
1406 	}
1407 
1408 	cmp = strcmp(trid1->subnqn, trid2->subnqn);
1409 	if (cmp) {
1410 		return cmp;
1411 	}
1412 
1413 	return 0;
1414 }
1415 
1416 int
1417 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str)
1418 {
1419 	size_t val_len;
1420 	char key[32];
1421 	char val[1024];
1422 
1423 	if (prchk_flags == NULL || str == NULL) {
1424 		return -EINVAL;
1425 	}
1426 
1427 	while (*str != '\0') {
1428 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1429 
1430 		if (val_len == 0) {
1431 			SPDK_ERRLOG("Failed to parse prchk\n");
1432 			return -EINVAL;
1433 		}
1434 
1435 		if (strcasecmp(key, "prchk") == 0) {
1436 			if (strcasestr(val, "reftag") != NULL) {
1437 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
1438 			}
1439 			if (strcasestr(val, "guard") != NULL) {
1440 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
1441 			}
1442 		} else {
1443 			SPDK_ERRLOG("Unknown key '%s'\n", key);
1444 			return -EINVAL;
1445 		}
1446 	}
1447 
1448 	return 0;
1449 }
1450 
1451 const char *
1452 spdk_nvme_prchk_flags_str(uint32_t prchk_flags)
1453 {
1454 	if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
1455 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1456 			return "prchk:reftag|guard";
1457 		} else {
1458 			return "prchk:reftag";
1459 		}
1460 	} else {
1461 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1462 			return "prchk:guard";
1463 		} else {
1464 			return NULL;
1465 		}
1466 	}
1467 }
1468 
1469 struct spdk_nvme_probe_ctx *
1470 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
1471 		      void *cb_ctx,
1472 		      spdk_nvme_probe_cb probe_cb,
1473 		      spdk_nvme_attach_cb attach_cb,
1474 		      spdk_nvme_remove_cb remove_cb)
1475 {
1476 	int rc;
1477 	struct spdk_nvme_probe_ctx *probe_ctx;
1478 
1479 	rc = nvme_driver_init();
1480 	if (rc != 0) {
1481 		return NULL;
1482 	}
1483 
1484 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1485 	if (!probe_ctx) {
1486 		return NULL;
1487 	}
1488 
1489 	nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb);
1490 	rc = nvme_probe_internal(probe_ctx, false);
1491 	if (rc != 0) {
1492 		free(probe_ctx);
1493 		return NULL;
1494 	}
1495 
1496 	return probe_ctx;
1497 }
1498 
1499 int
1500 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
1501 {
1502 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
1503 
1504 	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1505 		free(probe_ctx);
1506 		return 0;
1507 	}
1508 
1509 	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
1510 		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
1511 	}
1512 
1513 	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
1514 		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1515 		g_spdk_nvme_driver->initialized = true;
1516 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1517 		free(probe_ctx);
1518 		return 0;
1519 	}
1520 
1521 	return -EAGAIN;
1522 }
1523 
1524 struct spdk_nvme_probe_ctx *
1525 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
1526 			const struct spdk_nvme_ctrlr_opts *opts,
1527 			spdk_nvme_attach_cb attach_cb)
1528 {
1529 	int rc;
1530 	spdk_nvme_probe_cb probe_cb = NULL;
1531 	struct spdk_nvme_probe_ctx *probe_ctx;
1532 
1533 	rc = nvme_driver_init();
1534 	if (rc != 0) {
1535 		return NULL;
1536 	}
1537 
1538 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1539 	if (!probe_ctx) {
1540 		return NULL;
1541 	}
1542 
1543 	if (opts) {
1544 		probe_cb = nvme_connect_probe_cb;
1545 	}
1546 
1547 	nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL);
1548 	rc = nvme_probe_internal(probe_ctx, true);
1549 	if (rc != 0) {
1550 		free(probe_ctx);
1551 		return NULL;
1552 	}
1553 
1554 	return probe_ctx;
1555 }
1556 
1557 SPDK_LOG_REGISTER_COMPONENT(nvme)
1558