xref: /spdk/lib/nvme/nvme.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/nvmf_spec.h"
35 #include "spdk/string.h"
36 #include "spdk/env.h"
37 #include "nvme_internal.h"
38 #include "nvme_io_msg.h"
39 
40 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
41 
42 struct nvme_driver	*g_spdk_nvme_driver;
43 pid_t			g_spdk_nvme_pid;
44 
45 /* gross timeout of 180 seconds in milliseconds */
46 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
47 
48 /* Per-process attached controller list */
49 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
50 	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
51 
52 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
53 static bool
54 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
55 {
56 	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
57 }
58 
59 void
60 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
61 		     struct spdk_nvme_ctrlr *ctrlr)
62 {
63 	TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
64 }
65 
66 static void
67 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr)
68 {
69 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
70 	if (nvme_ctrlr_shared(ctrlr)) {
71 		TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
72 	} else {
73 		TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
74 	}
75 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
76 }
77 
78 static int
79 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr,
80 			struct nvme_ctrlr_detach_ctx **_ctx)
81 {
82 	struct nvme_ctrlr_detach_ctx *ctx;
83 	int ref_count;
84 
85 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
86 
87 	ref_count = nvme_ctrlr_get_ref_count(ctrlr);
88 	assert(ref_count > 0);
89 
90 	if (ref_count == 1) {
91 		/* This is the last reference to the controller, so we need to
92 		 * allocate a context to destruct it.
93 		 */
94 		ctx = calloc(1, sizeof(*ctx));
95 		if (ctx == NULL) {
96 			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
97 
98 			return -ENOMEM;
99 		}
100 		ctx->ctrlr = ctrlr;
101 		ctx->cb_fn = nvme_ctrlr_detach_async_finish;
102 
103 		nvme_ctrlr_proc_put_ref(ctrlr);
104 
105 		nvme_io_msg_ctrlr_detach(ctrlr);
106 
107 		nvme_ctrlr_destruct_async(ctrlr, ctx);
108 
109 		*_ctx = ctx;
110 	} else {
111 		nvme_ctrlr_proc_put_ref(ctrlr);
112 	}
113 
114 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
115 
116 	return 0;
117 }
118 
119 static int
120 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx)
121 {
122 	int rc;
123 
124 	rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx);
125 	if (rc == -EAGAIN) {
126 		return -EAGAIN;
127 	}
128 
129 	free(ctx);
130 
131 	return rc;
132 }
133 
134 int
135 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
136 {
137 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
138 	int rc;
139 
140 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
141 	if (rc != 0) {
142 		return rc;
143 	} else if (ctx == NULL) {
144 		/* ctrlr was detached from the caller process but any other process
145 		 * still attaches it.
146 		 */
147 		return 0;
148 	}
149 
150 	while (1) {
151 		rc = nvme_ctrlr_detach_poll_async(ctx);
152 		if (rc != -EAGAIN) {
153 			break;
154 		}
155 		nvme_delay(1000);
156 	}
157 
158 	return 0;
159 }
160 
161 int
162 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr,
163 		       struct spdk_nvme_detach_ctx **_detach_ctx)
164 {
165 	struct spdk_nvme_detach_ctx *detach_ctx;
166 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
167 	int rc;
168 
169 	if (ctrlr == NULL || _detach_ctx == NULL) {
170 		return -EINVAL;
171 	}
172 
173 	/* Use a context header to poll detachment for multiple controllers.
174 	 * Allocate an new one if not allocated yet, or use the passed one otherwise.
175 	 */
176 	detach_ctx = *_detach_ctx;
177 	if (detach_ctx == NULL) {
178 		detach_ctx = calloc(1, sizeof(*detach_ctx));
179 		if (detach_ctx == NULL) {
180 			return -ENOMEM;
181 		}
182 		TAILQ_INIT(&detach_ctx->head);
183 	}
184 
185 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
186 	if (rc != 0 || ctx == NULL) {
187 		/* If this detach failed and the context header is empty, it means we just
188 		 * allocated the header and need to free it before returning.
189 		 */
190 		if (TAILQ_EMPTY(&detach_ctx->head)) {
191 			free(detach_ctx);
192 		}
193 		return rc;
194 	}
195 
196 	/* Append a context for this detachment to the context header. */
197 	TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link);
198 
199 	*_detach_ctx = detach_ctx;
200 
201 	return 0;
202 }
203 
204 int
205 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx)
206 {
207 	struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx;
208 	int rc;
209 
210 	if (detach_ctx == NULL) {
211 		return -EINVAL;
212 	}
213 
214 	TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) {
215 		TAILQ_REMOVE(&detach_ctx->head, ctx, link);
216 
217 		rc = nvme_ctrlr_detach_poll_async(ctx);
218 		if (rc == -EAGAIN) {
219 			/* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */
220 			TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link);
221 		}
222 	}
223 
224 	if (!TAILQ_EMPTY(&detach_ctx->head)) {
225 		return -EAGAIN;
226 	}
227 
228 	free(detach_ctx);
229 	return 0;
230 }
231 
232 void
233 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx)
234 {
235 	while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
236 		;
237 	}
238 }
239 
240 void
241 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
242 {
243 	struct nvme_completion_poll_status	*status = arg;
244 
245 	if (status->timed_out) {
246 		/* There is no routine waiting for the completion of this request, free allocated memory */
247 		spdk_free(status->dma_data);
248 		free(status);
249 		return;
250 	}
251 
252 	/*
253 	 * Copy status into the argument passed by the caller, so that
254 	 *  the caller can check the status to determine if the
255 	 *  the request passed or failed.
256 	 */
257 	memcpy(&status->cpl, cpl, sizeof(*cpl));
258 	status->done = true;
259 }
260 
261 static void
262 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
263 {
264 }
265 
266 int
267 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair,
268 		struct nvme_completion_poll_status *status,
269 		pthread_mutex_t *robust_mutex)
270 {
271 	int rc;
272 
273 	if (robust_mutex) {
274 		nvme_robust_mutex_lock(robust_mutex);
275 	}
276 
277 	if (qpair->poll_group) {
278 		rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0,
279 				dummy_disconnected_qpair_cb);
280 	} else {
281 		rc = spdk_nvme_qpair_process_completions(qpair, 0);
282 	}
283 
284 	if (robust_mutex) {
285 		nvme_robust_mutex_unlock(robust_mutex);
286 	}
287 
288 	if (rc < 0) {
289 		status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
290 		status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
291 		goto error;
292 	}
293 
294 	if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) {
295 		goto error;
296 	}
297 
298 	if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
299 		union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr);
300 		if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) {
301 			status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
302 			status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
303 			goto error;
304 		}
305 	}
306 
307 	if (!status->done) {
308 		return -EAGAIN;
309 	} else if (spdk_nvme_cpl_is_error(&status->cpl)) {
310 		return -EIO;
311 	} else {
312 		return 0;
313 	}
314 error:
315 	/* Either transport error occurred or we've timed out.  Either way, if the response hasn't
316 	 * been received yet, mark the command as timed out, so the status gets freed when the
317 	 * command is completed or aborted.
318 	 */
319 	if (!status->done) {
320 		status->timed_out = true;
321 	}
322 
323 	return -ECANCELED;
324 }
325 
326 /**
327  * Poll qpair for completions until a command completes.
328  *
329  * \param qpair queue to poll
330  * \param status completion status. The user must fill this structure with zeroes before calling
331  * this function
332  * \param robust_mutex optional robust mutex to lock while polling qpair
333  * \param timeout_in_usecs optional timeout
334  *
335  * \return 0 if command completed without error,
336  * -EIO if command completed with error,
337  * -ECANCELED if command is not completed due to transport/device error or time expired
338  *
339  *  The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
340  *  and status as the callback argument.
341  */
342 int
343 nvme_wait_for_completion_robust_lock_timeout(
344 	struct spdk_nvme_qpair *qpair,
345 	struct nvme_completion_poll_status *status,
346 	pthread_mutex_t *robust_mutex,
347 	uint64_t timeout_in_usecs)
348 {
349 	int rc;
350 
351 	if (timeout_in_usecs) {
352 		status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs *
353 				      spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
354 	} else {
355 		status->timeout_tsc = 0;
356 	}
357 
358 	status->cpl.status_raw = 0;
359 	do {
360 		rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex);
361 	} while (rc == -EAGAIN);
362 
363 	return rc;
364 }
365 
366 /**
367  * Poll qpair for completions until a command completes.
368  *
369  * \param qpair queue to poll
370  * \param status completion status. The user must fill this structure with zeroes before calling
371  * this function
372  * \param robust_mutex optional robust mutex to lock while polling qpair
373  *
374  * \return 0 if command completed without error,
375  * -EIO if command completed with error,
376  * -ECANCELED if command is not completed due to transport/device error
377  *
378  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
379  * and status as the callback argument.
380  */
381 int
382 nvme_wait_for_completion_robust_lock(
383 	struct spdk_nvme_qpair *qpair,
384 	struct nvme_completion_poll_status *status,
385 	pthread_mutex_t *robust_mutex)
386 {
387 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0);
388 }
389 
390 int
391 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
392 			 struct nvme_completion_poll_status *status)
393 {
394 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0);
395 }
396 
397 /**
398  * Poll qpair for completions until a command completes.
399  *
400  * \param qpair queue to poll
401  * \param status completion status. The user must fill this structure with zeroes before calling
402  * this function
403  * \param timeout_in_usecs optional timeout
404  *
405  * \return 0 if command completed without error,
406  * -EIO if command completed with error,
407  * -ECANCELED if command is not completed due to transport/device error or time expired
408  *
409  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
410  * and status as the callback argument.
411  */
412 int
413 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
414 				 struct nvme_completion_poll_status *status,
415 				 uint64_t timeout_in_usecs)
416 {
417 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs);
418 }
419 
420 static void
421 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
422 {
423 	struct nvme_request *req = arg;
424 	enum spdk_nvme_data_transfer xfer;
425 
426 	if (req->user_buffer && req->payload_size) {
427 		/* Copy back to the user buffer and free the contig buffer */
428 		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
429 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
430 		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
431 		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
432 			assert(req->pid == getpid());
433 			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
434 		}
435 
436 		spdk_free(req->payload.contig_or_cb_arg);
437 	}
438 
439 	/* Call the user's original callback now that the buffer has been copied */
440 	req->user_cb_fn(req->user_cb_arg, cpl);
441 }
442 
443 /**
444  * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
445  *
446  * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
447  * where the overhead of a copy is not a problem.
448  */
449 struct nvme_request *
450 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
451 				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
452 				void *cb_arg, bool host_to_controller)
453 {
454 	struct nvme_request *req;
455 	void *dma_buffer = NULL;
456 
457 	if (buffer && payload_size) {
458 		dma_buffer = spdk_zmalloc(payload_size, 4096, NULL,
459 					  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
460 		if (!dma_buffer) {
461 			return NULL;
462 		}
463 
464 		if (host_to_controller) {
465 			memcpy(dma_buffer, buffer, payload_size);
466 		}
467 	}
468 
469 	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
470 					   NULL);
471 	if (!req) {
472 		spdk_free(dma_buffer);
473 		return NULL;
474 	}
475 
476 	req->user_cb_fn = cb_fn;
477 	req->user_cb_arg = cb_arg;
478 	req->user_buffer = buffer;
479 	req->cb_arg = req;
480 
481 	return req;
482 }
483 
484 /**
485  * Check if a request has exceeded the controller timeout.
486  *
487  * \param req request to check for timeout.
488  * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
489  * \param active_proc per-process data for the controller associated with req
490  * \param now_tick current time from spdk_get_ticks()
491  * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
492  * 1 if requests newer than req need not be checked.
493  *
494  * The request's timeout callback will be called if needed; the caller is only responsible for
495  * calling this function on each outstanding request.
496  */
497 int
498 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
499 			   struct spdk_nvme_ctrlr_process *active_proc,
500 			   uint64_t now_tick)
501 {
502 	struct spdk_nvme_qpair *qpair = req->qpair;
503 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
504 	uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ?
505 				 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks;
506 
507 	assert(active_proc->timeout_cb_fn != NULL);
508 
509 	if (req->timed_out || req->submit_tick == 0) {
510 		return 0;
511 	}
512 
513 	if (req->pid != g_spdk_nvme_pid) {
514 		return 0;
515 	}
516 
517 	if (nvme_qpair_is_admin_queue(qpair) &&
518 	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
519 		return 0;
520 	}
521 
522 	if (req->submit_tick + timeout_ticks > now_tick) {
523 		return 1;
524 	}
525 
526 	req->timed_out = true;
527 
528 	/*
529 	 * We don't want to expose the admin queue to the user,
530 	 * so when we're timing out admin commands set the
531 	 * qpair to NULL.
532 	 */
533 	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
534 				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
535 				   cid);
536 	return 0;
537 }
538 
539 int
540 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
541 {
542 	int rc = 0;
543 
544 #ifdef __FreeBSD__
545 	pthread_mutex_init(mtx, NULL);
546 #else
547 	pthread_mutexattr_t attr;
548 
549 	if (pthread_mutexattr_init(&attr)) {
550 		return -1;
551 	}
552 	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
553 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
554 	    pthread_mutex_init(mtx, &attr)) {
555 		rc = -1;
556 	}
557 	pthread_mutexattr_destroy(&attr);
558 #endif
559 
560 	return rc;
561 }
562 
563 int
564 nvme_driver_init(void)
565 {
566 	static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER;
567 	int ret = 0;
568 	/* Any socket ID */
569 	int socket_id = -1;
570 
571 	/* Use a special process-private mutex to ensure the global
572 	 * nvme driver object (g_spdk_nvme_driver) gets initialized by
573 	 * only one thread.  Once that object is established and its
574 	 * mutex is initialized, we can unlock this mutex and use that
575 	 * one instead.
576 	 */
577 	pthread_mutex_lock(&g_init_mutex);
578 
579 	/* Each process needs its own pid. */
580 	g_spdk_nvme_pid = getpid();
581 
582 	/*
583 	 * Only one thread from one process will do this driver init work.
584 	 * The primary process will reserve the shared memory and do the
585 	 *  initialization.
586 	 * The secondary process will lookup the existing reserved memory.
587 	 */
588 	if (spdk_process_is_primary()) {
589 		/* The unique named memzone already reserved. */
590 		if (g_spdk_nvme_driver != NULL) {
591 			pthread_mutex_unlock(&g_init_mutex);
592 			return 0;
593 		} else {
594 			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
595 					     sizeof(struct nvme_driver), socket_id,
596 					     SPDK_MEMZONE_NO_IOVA_CONTIG);
597 		}
598 
599 		if (g_spdk_nvme_driver == NULL) {
600 			SPDK_ERRLOG("primary process failed to reserve memory\n");
601 			pthread_mutex_unlock(&g_init_mutex);
602 			return -1;
603 		}
604 	} else {
605 		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
606 
607 		/* The unique named memzone already reserved by the primary process. */
608 		if (g_spdk_nvme_driver != NULL) {
609 			int ms_waited = 0;
610 
611 			/* Wait the nvme driver to get initialized. */
612 			while ((g_spdk_nvme_driver->initialized == false) &&
613 			       (ms_waited < g_nvme_driver_timeout_ms)) {
614 				ms_waited++;
615 				nvme_delay(1000); /* delay 1ms */
616 			}
617 			if (g_spdk_nvme_driver->initialized == false) {
618 				SPDK_ERRLOG("timeout waiting for primary process to init\n");
619 				pthread_mutex_unlock(&g_init_mutex);
620 				return -1;
621 			}
622 		} else {
623 			SPDK_ERRLOG("primary process is not started yet\n");
624 			pthread_mutex_unlock(&g_init_mutex);
625 			return -1;
626 		}
627 
628 		pthread_mutex_unlock(&g_init_mutex);
629 		return 0;
630 	}
631 
632 	/*
633 	 * At this moment, only one thread from the primary process will do
634 	 * the g_spdk_nvme_driver initialization
635 	 */
636 	assert(spdk_process_is_primary());
637 
638 	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
639 	if (ret != 0) {
640 		SPDK_ERRLOG("failed to initialize mutex\n");
641 		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
642 		pthread_mutex_unlock(&g_init_mutex);
643 		return ret;
644 	}
645 
646 	/* The lock in the shared g_spdk_nvme_driver object is now ready to
647 	 * be used - so we can unlock the g_init_mutex here.
648 	 */
649 	pthread_mutex_unlock(&g_init_mutex);
650 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
651 
652 	g_spdk_nvme_driver->initialized = false;
653 	g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen();
654 	if (g_spdk_nvme_driver->hotplug_fd < 0) {
655 		SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n");
656 	}
657 
658 	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
659 
660 	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
661 
662 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
663 
664 	return ret;
665 }
666 
667 /* This function must only be called while holding g_spdk_nvme_driver->lock */
668 int
669 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
670 		 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle)
671 {
672 	struct spdk_nvme_ctrlr *ctrlr;
673 	struct spdk_nvme_ctrlr_opts opts;
674 
675 	assert(trid != NULL);
676 
677 	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
678 
679 	if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) {
680 		ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
681 		if (ctrlr) {
682 			/* This ctrlr already exists. */
683 
684 			if (ctrlr->is_destructed) {
685 				/* This ctrlr is being destructed asynchronously. */
686 				SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n",
687 					    trid->traddr);
688 				return -EBUSY;
689 			}
690 
691 			/* Increase the ref count before calling attach_cb() as the user may
692 			* call nvme_detach() immediately. */
693 			nvme_ctrlr_proc_get_ref(ctrlr);
694 
695 			if (probe_ctx->attach_cb) {
696 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
697 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
698 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
699 			}
700 			return 0;
701 		}
702 
703 		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
704 		if (ctrlr == NULL) {
705 			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
706 			return -1;
707 		}
708 		ctrlr->remove_cb = probe_ctx->remove_cb;
709 		ctrlr->cb_ctx = probe_ctx->cb_ctx;
710 
711 		nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
712 		TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
713 		return 0;
714 	}
715 
716 	return 1;
717 }
718 
719 static void
720 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
721 			 struct spdk_nvme_probe_ctx *probe_ctx)
722 {
723 	int rc = 0;
724 
725 	rc = nvme_ctrlr_process_init(ctrlr);
726 
727 	if (rc) {
728 		/* Controller failed to initialize. */
729 		TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
730 		SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
731 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
732 		nvme_ctrlr_fail(ctrlr, false);
733 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
734 		nvme_ctrlr_destruct(ctrlr);
735 		return;
736 	}
737 
738 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
739 		return;
740 	}
741 
742 	STAILQ_INIT(&ctrlr->io_producers);
743 
744 	/*
745 	 * Controller has been initialized.
746 	 *  Move it to the attached_ctrlrs list.
747 	 */
748 	TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
749 
750 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
751 	if (nvme_ctrlr_shared(ctrlr)) {
752 		TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
753 	} else {
754 		TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
755 	}
756 
757 	/*
758 	 * Increase the ref count before calling attach_cb() as the user may
759 	 * call nvme_detach() immediately.
760 	 */
761 	nvme_ctrlr_proc_get_ref(ctrlr);
762 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
763 
764 	if (probe_ctx->attach_cb) {
765 		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
766 	}
767 }
768 
769 static int
770 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx)
771 {
772 	int rc = 0;
773 
774 	while (true) {
775 		rc = spdk_nvme_probe_poll_async(probe_ctx);
776 		if (rc != -EAGAIN) {
777 			return rc;
778 		}
779 	}
780 
781 	return rc;
782 }
783 
784 /* This function must not be called while holding g_spdk_nvme_driver->lock */
785 static struct spdk_nvme_ctrlr *
786 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid)
787 {
788 	struct spdk_nvme_ctrlr *ctrlr;
789 
790 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
791 	ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
792 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
793 
794 	return ctrlr;
795 }
796 
797 /* This function must be called while holding g_spdk_nvme_driver->lock */
798 struct spdk_nvme_ctrlr *
799 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid)
800 {
801 	struct spdk_nvme_ctrlr *ctrlr;
802 
803 	/* Search per-process list */
804 	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
805 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
806 			return ctrlr;
807 		}
808 	}
809 
810 	/* Search multi-process shared list */
811 	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
812 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
813 			return ctrlr;
814 		}
815 	}
816 
817 	return NULL;
818 }
819 
820 /* This function must only be called while holding g_spdk_nvme_driver->lock */
821 static int
822 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx,
823 		    bool direct_connect)
824 {
825 	int rc;
826 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
827 
828 	spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype);
829 	if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) {
830 		SPDK_ERRLOG("NVMe trtype %u not available\n", probe_ctx->trid.trtype);
831 		return -1;
832 	}
833 
834 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
835 
836 	rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect);
837 	if (rc != 0) {
838 		SPDK_ERRLOG("NVMe ctrlr scan failed\n");
839 		TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
840 			TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
841 			nvme_transport_ctrlr_destruct(ctrlr);
842 		}
843 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
844 		return -1;
845 	}
846 
847 	/*
848 	 * Probe controllers on the shared_attached_ctrlrs list
849 	 */
850 	if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
851 		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
852 			/* Do not attach other ctrlrs if user specify a valid trid */
853 			if ((strlen(probe_ctx->trid.traddr) != 0) &&
854 			    (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) {
855 				continue;
856 			}
857 
858 			/* Do not attach if we failed to initialize it in this process */
859 			if (nvme_ctrlr_get_current_process(ctrlr) == NULL) {
860 				continue;
861 			}
862 
863 			nvme_ctrlr_proc_get_ref(ctrlr);
864 
865 			/*
866 			 * Unlock while calling attach_cb() so the user can call other functions
867 			 *  that may take the driver lock, like nvme_detach().
868 			 */
869 			if (probe_ctx->attach_cb) {
870 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
871 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
872 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
873 			}
874 		}
875 	}
876 
877 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
878 
879 	return 0;
880 }
881 
882 static void
883 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
884 		    const struct spdk_nvme_transport_id *trid,
885 		    void *cb_ctx,
886 		    spdk_nvme_probe_cb probe_cb,
887 		    spdk_nvme_attach_cb attach_cb,
888 		    spdk_nvme_remove_cb remove_cb)
889 {
890 	probe_ctx->trid = *trid;
891 	probe_ctx->cb_ctx = cb_ctx;
892 	probe_ctx->probe_cb = probe_cb;
893 	probe_ctx->attach_cb = attach_cb;
894 	probe_ctx->remove_cb = remove_cb;
895 	TAILQ_INIT(&probe_ctx->init_ctrlrs);
896 }
897 
898 int
899 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
900 		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
901 		spdk_nvme_remove_cb remove_cb)
902 {
903 	struct spdk_nvme_transport_id trid_pcie;
904 	struct spdk_nvme_probe_ctx *probe_ctx;
905 
906 	if (trid == NULL) {
907 		memset(&trid_pcie, 0, sizeof(trid_pcie));
908 		spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
909 		trid = &trid_pcie;
910 	}
911 
912 	probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb,
913 					  attach_cb, remove_cb);
914 	if (!probe_ctx) {
915 		SPDK_ERRLOG("Create probe context failed\n");
916 		return -1;
917 	}
918 
919 	/*
920 	 * Keep going even if one or more nvme_attach() calls failed,
921 	 *  but maintain the value of rc to signal errors when we return.
922 	 */
923 	return nvme_init_controllers(probe_ctx);
924 }
925 
926 static bool
927 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
928 		      struct spdk_nvme_ctrlr_opts *opts)
929 {
930 	struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx;
931 
932 	assert(requested_opts);
933 	memcpy(opts, requested_opts, sizeof(*opts));
934 
935 	return true;
936 }
937 
938 static void
939 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts,
940 		     const struct spdk_nvme_ctrlr_opts *opts_user,
941 		     size_t opts_size_user)
942 {
943 	assert(opts);
944 	assert(opts_user);
945 
946 	spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user);
947 
948 #define FIELD_OK(field) \
949 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size)
950 
951 #define SET_FIELD(field) \
952 	if (FIELD_OK(field)) { \
953 			opts->field = opts_user->field; \
954 	}
955 
956 #define SET_FIELD_ARRAY(field) \
957 	if (FIELD_OK(field)) { \
958 		memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \
959 	}
960 
961 	SET_FIELD(num_io_queues);
962 	SET_FIELD(use_cmb_sqs);
963 	SET_FIELD(no_shn_notification);
964 	SET_FIELD(arb_mechanism);
965 	SET_FIELD(arbitration_burst);
966 	SET_FIELD(low_priority_weight);
967 	SET_FIELD(medium_priority_weight);
968 	SET_FIELD(high_priority_weight);
969 	SET_FIELD(keep_alive_timeout_ms);
970 	SET_FIELD(transport_retry_count);
971 	SET_FIELD(io_queue_size);
972 	SET_FIELD_ARRAY(hostnqn);
973 	SET_FIELD(io_queue_requests);
974 	SET_FIELD_ARRAY(src_addr);
975 	SET_FIELD_ARRAY(src_svcid);
976 	SET_FIELD_ARRAY(host_id);
977 	SET_FIELD_ARRAY(extended_host_id);
978 	SET_FIELD(command_set);
979 	SET_FIELD(admin_timeout_ms);
980 	SET_FIELD(header_digest);
981 	SET_FIELD(data_digest);
982 	SET_FIELD(disable_error_logging);
983 	SET_FIELD(transport_ack_timeout);
984 	SET_FIELD(admin_queue_size);
985 	SET_FIELD(fabrics_connect_timeout_us);
986 	SET_FIELD(disable_read_ana_log_page);
987 
988 	/* Do not remove this statement. When you add a new field, please do update this
989 	 * assert with the correct size. And do not forget to add a new SET_FIELD statement
990 	 * related with your new added field. */
991 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_opts) == 616, "Incorrect size");
992 
993 #undef FIELD_OK
994 #undef SET_FIELD
995 #undef SET_FIELD_ARRAY
996 }
997 
998 struct spdk_nvme_ctrlr *
999 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
1000 		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
1001 {
1002 	int rc;
1003 	struct spdk_nvme_ctrlr *ctrlr = NULL;
1004 	struct spdk_nvme_probe_ctx *probe_ctx;
1005 	struct spdk_nvme_ctrlr_opts *opts_local_p = NULL;
1006 	struct spdk_nvme_ctrlr_opts opts_local;
1007 
1008 	if (trid == NULL) {
1009 		SPDK_ERRLOG("No transport ID specified\n");
1010 		return NULL;
1011 	}
1012 
1013 	if (opts) {
1014 		opts_local_p = &opts_local;
1015 		nvme_ctrlr_opts_init(opts_local_p, opts, opts_size);
1016 	}
1017 
1018 	probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL);
1019 	if (!probe_ctx) {
1020 		SPDK_ERRLOG("Create probe context failed\n");
1021 		return NULL;
1022 	}
1023 
1024 	rc = nvme_init_controllers(probe_ctx);
1025 	if (rc != 0) {
1026 		return NULL;
1027 	}
1028 
1029 	ctrlr = nvme_get_ctrlr_by_trid(trid);
1030 
1031 	return ctrlr;
1032 }
1033 
1034 void
1035 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid,
1036 				  enum spdk_nvme_transport_type trtype)
1037 {
1038 	const char *trstring = "";
1039 
1040 	trid->trtype = trtype;
1041 	switch (trtype) {
1042 	case SPDK_NVME_TRANSPORT_FC:
1043 		trstring = SPDK_NVME_TRANSPORT_NAME_FC;
1044 		break;
1045 	case SPDK_NVME_TRANSPORT_PCIE:
1046 		trstring = SPDK_NVME_TRANSPORT_NAME_PCIE;
1047 		break;
1048 	case SPDK_NVME_TRANSPORT_RDMA:
1049 		trstring = SPDK_NVME_TRANSPORT_NAME_RDMA;
1050 		break;
1051 	case SPDK_NVME_TRANSPORT_TCP:
1052 		trstring = SPDK_NVME_TRANSPORT_NAME_TCP;
1053 		break;
1054 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1055 		trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER;
1056 		break;
1057 	case SPDK_NVME_TRANSPORT_CUSTOM:
1058 		trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM;
1059 		break;
1060 	default:
1061 		SPDK_ERRLOG("no available transports\n");
1062 		assert(0);
1063 		return;
1064 	}
1065 	snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1066 }
1067 
1068 int
1069 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring)
1070 {
1071 	int len, i, rc;
1072 
1073 	if (trstring == NULL) {
1074 		return -EINVAL;
1075 	}
1076 
1077 	len = strnlen(trstring, SPDK_NVMF_TRSTRING_MAX_LEN);
1078 	if (len == SPDK_NVMF_TRSTRING_MAX_LEN) {
1079 		return -EINVAL;
1080 	}
1081 
1082 	rc = snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1083 	if (rc < 0) {
1084 		return rc;
1085 	}
1086 
1087 	/* cast official trstring to uppercase version of input. */
1088 	for (i = 0; i < len; i++) {
1089 		trid->trstring[i] = toupper(trid->trstring[i]);
1090 	}
1091 	return 0;
1092 }
1093 
1094 int
1095 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
1096 {
1097 	if (trtype == NULL || str == NULL) {
1098 		return -EINVAL;
1099 	}
1100 
1101 	if (strcasecmp(str, "PCIe") == 0) {
1102 		*trtype = SPDK_NVME_TRANSPORT_PCIE;
1103 	} else if (strcasecmp(str, "RDMA") == 0) {
1104 		*trtype = SPDK_NVME_TRANSPORT_RDMA;
1105 	} else if (strcasecmp(str, "FC") == 0) {
1106 		*trtype = SPDK_NVME_TRANSPORT_FC;
1107 	} else if (strcasecmp(str, "TCP") == 0) {
1108 		*trtype = SPDK_NVME_TRANSPORT_TCP;
1109 	} else if (strcasecmp(str, "VFIOUSER") == 0) {
1110 		*trtype = SPDK_NVME_TRANSPORT_VFIOUSER;
1111 	} else {
1112 		*trtype = SPDK_NVME_TRANSPORT_CUSTOM;
1113 	}
1114 	return 0;
1115 }
1116 
1117 const char *
1118 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
1119 {
1120 	switch (trtype) {
1121 	case SPDK_NVME_TRANSPORT_PCIE:
1122 		return "PCIe";
1123 	case SPDK_NVME_TRANSPORT_RDMA:
1124 		return "RDMA";
1125 	case SPDK_NVME_TRANSPORT_FC:
1126 		return "FC";
1127 	case SPDK_NVME_TRANSPORT_TCP:
1128 		return "TCP";
1129 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1130 		return "VFIOUSER";
1131 	case SPDK_NVME_TRANSPORT_CUSTOM:
1132 		return "CUSTOM";
1133 	default:
1134 		return NULL;
1135 	}
1136 }
1137 
1138 int
1139 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
1140 {
1141 	if (adrfam == NULL || str == NULL) {
1142 		return -EINVAL;
1143 	}
1144 
1145 	if (strcasecmp(str, "IPv4") == 0) {
1146 		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
1147 	} else if (strcasecmp(str, "IPv6") == 0) {
1148 		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
1149 	} else if (strcasecmp(str, "IB") == 0) {
1150 		*adrfam = SPDK_NVMF_ADRFAM_IB;
1151 	} else if (strcasecmp(str, "FC") == 0) {
1152 		*adrfam = SPDK_NVMF_ADRFAM_FC;
1153 	} else {
1154 		return -ENOENT;
1155 	}
1156 	return 0;
1157 }
1158 
1159 const char *
1160 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
1161 {
1162 	switch (adrfam) {
1163 	case SPDK_NVMF_ADRFAM_IPV4:
1164 		return "IPv4";
1165 	case SPDK_NVMF_ADRFAM_IPV6:
1166 		return "IPv6";
1167 	case SPDK_NVMF_ADRFAM_IB:
1168 		return "IB";
1169 	case SPDK_NVMF_ADRFAM_FC:
1170 		return "FC";
1171 	default:
1172 		return NULL;
1173 	}
1174 }
1175 
1176 static size_t
1177 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size)
1178 {
1179 
1180 	const char *sep, *sep1;
1181 	const char *whitespace = " \t\n";
1182 	size_t key_len, val_len;
1183 
1184 	*str += strspn(*str, whitespace);
1185 
1186 	sep = strchr(*str, ':');
1187 	if (!sep) {
1188 		sep = strchr(*str, '=');
1189 		if (!sep) {
1190 			SPDK_ERRLOG("Key without ':' or '=' separator\n");
1191 			return 0;
1192 		}
1193 	} else {
1194 		sep1 = strchr(*str, '=');
1195 		if ((sep1 != NULL) && (sep1 < sep)) {
1196 			sep = sep1;
1197 		}
1198 	}
1199 
1200 	key_len = sep - *str;
1201 	if (key_len >= key_buf_size) {
1202 		SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
1203 			    key_len, key_buf_size - 1);
1204 		return 0;
1205 	}
1206 
1207 	memcpy(key, *str, key_len);
1208 	key[key_len] = '\0';
1209 
1210 	*str += key_len + 1; /* Skip key: */
1211 	val_len = strcspn(*str, whitespace);
1212 	if (val_len == 0) {
1213 		SPDK_ERRLOG("Key without value\n");
1214 		return 0;
1215 	}
1216 
1217 	if (val_len >= val_buf_size) {
1218 		SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
1219 			    val_len, val_buf_size - 1);
1220 		return 0;
1221 	}
1222 
1223 	memcpy(val, *str, val_len);
1224 	val[val_len] = '\0';
1225 
1226 	*str += val_len;
1227 
1228 	return val_len;
1229 }
1230 
1231 int
1232 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
1233 {
1234 	size_t val_len;
1235 	char key[32];
1236 	char val[1024];
1237 
1238 	if (trid == NULL || str == NULL) {
1239 		return -EINVAL;
1240 	}
1241 
1242 	while (*str != '\0') {
1243 
1244 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1245 
1246 		if (val_len == 0) {
1247 			SPDK_ERRLOG("Failed to parse transport ID\n");
1248 			return -EINVAL;
1249 		}
1250 
1251 		if (strcasecmp(key, "trtype") == 0) {
1252 			if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) {
1253 				SPDK_ERRLOG("invalid transport '%s'\n", val);
1254 				return -EINVAL;
1255 			}
1256 			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
1257 				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
1258 				return -EINVAL;
1259 			}
1260 		} else if (strcasecmp(key, "adrfam") == 0) {
1261 			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
1262 				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
1263 				return -EINVAL;
1264 			}
1265 		} else if (strcasecmp(key, "traddr") == 0) {
1266 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1267 				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
1268 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1269 				return -EINVAL;
1270 			}
1271 			memcpy(trid->traddr, val, val_len + 1);
1272 		} else if (strcasecmp(key, "trsvcid") == 0) {
1273 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1274 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1275 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1276 				return -EINVAL;
1277 			}
1278 			memcpy(trid->trsvcid, val, val_len + 1);
1279 		} else if (strcasecmp(key, "priority") == 0) {
1280 			if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) {
1281 				SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n",
1282 					    val_len, SPDK_NVMF_PRIORITY_MAX_LEN);
1283 				return -EINVAL;
1284 			}
1285 			trid->priority = spdk_strtol(val, 10);
1286 		} else if (strcasecmp(key, "subnqn") == 0) {
1287 			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
1288 				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
1289 					    val_len, SPDK_NVMF_NQN_MAX_LEN);
1290 				return -EINVAL;
1291 			}
1292 			memcpy(trid->subnqn, val, val_len + 1);
1293 		} else if (strcasecmp(key, "hostaddr") == 0) {
1294 			continue;
1295 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1296 			continue;
1297 		} else if (strcasecmp(key, "hostnqn") == 0) {
1298 			continue;
1299 		} else if (strcasecmp(key, "ns") == 0) {
1300 			/*
1301 			 * Special case.  The namespace id parameter may
1302 			 * optionally be passed in the transport id string
1303 			 * for an SPDK application (e.g. nvme/perf)
1304 			 * and additionally parsed therein to limit
1305 			 * targeting a specific namespace.  For this
1306 			 * scenario, just silently ignore this key
1307 			 * rather than letting it default to logging
1308 			 * it as an invalid key.
1309 			 */
1310 			continue;
1311 		} else if (strcasecmp(key, "alt_traddr") == 0) {
1312 			/*
1313 			 * Used by applications for enabling transport ID failover.
1314 			 * Please see the case above for more information on custom parameters.
1315 			 */
1316 			continue;
1317 		} else {
1318 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1319 		}
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 int
1326 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str)
1327 {
1328 
1329 	size_t key_size = 32;
1330 	size_t val_size = 1024;
1331 	size_t val_len;
1332 	char key[key_size];
1333 	char val[val_size];
1334 
1335 	if (hostid == NULL || str == NULL) {
1336 		return -EINVAL;
1337 	}
1338 
1339 	while (*str != '\0') {
1340 
1341 		val_len = parse_next_key(&str, key, val, key_size, val_size);
1342 
1343 		if (val_len == 0) {
1344 			SPDK_ERRLOG("Failed to parse host ID\n");
1345 			return val_len;
1346 		}
1347 
1348 		/* Ignore the rest of the options from the transport ID. */
1349 		if (strcasecmp(key, "trtype") == 0) {
1350 			continue;
1351 		} else if (strcasecmp(key, "adrfam") == 0) {
1352 			continue;
1353 		} else if (strcasecmp(key, "traddr") == 0) {
1354 			continue;
1355 		} else if (strcasecmp(key, "trsvcid") == 0) {
1356 			continue;
1357 		} else if (strcasecmp(key, "subnqn") == 0) {
1358 			continue;
1359 		} else if (strcasecmp(key, "priority") == 0) {
1360 			continue;
1361 		} else if (strcasecmp(key, "ns") == 0) {
1362 			continue;
1363 		} else if (strcasecmp(key, "hostaddr") == 0) {
1364 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1365 				SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
1366 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1367 				return -EINVAL;
1368 			}
1369 			memcpy(hostid->hostaddr, val, val_len + 1);
1370 
1371 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1372 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1373 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1374 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1375 				return -EINVAL;
1376 			}
1377 			memcpy(hostid->hostsvcid, val, val_len + 1);
1378 		} else {
1379 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1380 		}
1381 	}
1382 
1383 	return 0;
1384 }
1385 
1386 static int
1387 cmp_int(int a, int b)
1388 {
1389 	return a - b;
1390 }
1391 
1392 int
1393 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
1394 			       const struct spdk_nvme_transport_id *trid2)
1395 {
1396 	int cmp;
1397 
1398 	if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
1399 		cmp = strcasecmp(trid1->trstring, trid2->trstring);
1400 	} else {
1401 		cmp = cmp_int(trid1->trtype, trid2->trtype);
1402 	}
1403 
1404 	if (cmp) {
1405 		return cmp;
1406 	}
1407 
1408 	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
1409 		struct spdk_pci_addr pci_addr1 = {};
1410 		struct spdk_pci_addr pci_addr2 = {};
1411 
1412 		/* Normalize PCI addresses before comparing */
1413 		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
1414 		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
1415 			return -1;
1416 		}
1417 
1418 		/* PCIe transport ID only uses trtype and traddr */
1419 		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
1420 	}
1421 
1422 	cmp = strcasecmp(trid1->traddr, trid2->traddr);
1423 	if (cmp) {
1424 		return cmp;
1425 	}
1426 
1427 	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
1428 	if (cmp) {
1429 		return cmp;
1430 	}
1431 
1432 	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
1433 	if (cmp) {
1434 		return cmp;
1435 	}
1436 
1437 	cmp = strcmp(trid1->subnqn, trid2->subnqn);
1438 	if (cmp) {
1439 		return cmp;
1440 	}
1441 
1442 	return 0;
1443 }
1444 
1445 int
1446 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str)
1447 {
1448 	size_t val_len;
1449 	char key[32];
1450 	char val[1024];
1451 
1452 	if (prchk_flags == NULL || str == NULL) {
1453 		return -EINVAL;
1454 	}
1455 
1456 	while (*str != '\0') {
1457 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1458 
1459 		if (val_len == 0) {
1460 			SPDK_ERRLOG("Failed to parse prchk\n");
1461 			return -EINVAL;
1462 		}
1463 
1464 		if (strcasecmp(key, "prchk") == 0) {
1465 			if (strcasestr(val, "reftag") != NULL) {
1466 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
1467 			}
1468 			if (strcasestr(val, "guard") != NULL) {
1469 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
1470 			}
1471 		} else {
1472 			SPDK_ERRLOG("Unknown key '%s'\n", key);
1473 			return -EINVAL;
1474 		}
1475 	}
1476 
1477 	return 0;
1478 }
1479 
1480 const char *
1481 spdk_nvme_prchk_flags_str(uint32_t prchk_flags)
1482 {
1483 	if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
1484 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1485 			return "prchk:reftag|guard";
1486 		} else {
1487 			return "prchk:reftag";
1488 		}
1489 	} else {
1490 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1491 			return "prchk:guard";
1492 		} else {
1493 			return NULL;
1494 		}
1495 	}
1496 }
1497 
1498 struct spdk_nvme_probe_ctx *
1499 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
1500 		      void *cb_ctx,
1501 		      spdk_nvme_probe_cb probe_cb,
1502 		      spdk_nvme_attach_cb attach_cb,
1503 		      spdk_nvme_remove_cb remove_cb)
1504 {
1505 	int rc;
1506 	struct spdk_nvme_probe_ctx *probe_ctx;
1507 
1508 	rc = nvme_driver_init();
1509 	if (rc != 0) {
1510 		return NULL;
1511 	}
1512 
1513 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1514 	if (!probe_ctx) {
1515 		return NULL;
1516 	}
1517 
1518 	nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb);
1519 	rc = nvme_probe_internal(probe_ctx, false);
1520 	if (rc != 0) {
1521 		free(probe_ctx);
1522 		return NULL;
1523 	}
1524 
1525 	return probe_ctx;
1526 }
1527 
1528 int
1529 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
1530 {
1531 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
1532 
1533 	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1534 		free(probe_ctx);
1535 		return 0;
1536 	}
1537 
1538 	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
1539 		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
1540 	}
1541 
1542 	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
1543 		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1544 		g_spdk_nvme_driver->initialized = true;
1545 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1546 		free(probe_ctx);
1547 		return 0;
1548 	}
1549 
1550 	return -EAGAIN;
1551 }
1552 
1553 struct spdk_nvme_probe_ctx *
1554 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
1555 			const struct spdk_nvme_ctrlr_opts *opts,
1556 			spdk_nvme_attach_cb attach_cb)
1557 {
1558 	int rc;
1559 	spdk_nvme_probe_cb probe_cb = NULL;
1560 	struct spdk_nvme_probe_ctx *probe_ctx;
1561 
1562 	rc = nvme_driver_init();
1563 	if (rc != 0) {
1564 		return NULL;
1565 	}
1566 
1567 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1568 	if (!probe_ctx) {
1569 		return NULL;
1570 	}
1571 
1572 	if (opts) {
1573 		probe_cb = nvme_connect_probe_cb;
1574 	}
1575 
1576 	nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL);
1577 	rc = nvme_probe_internal(probe_ctx, true);
1578 	if (rc != 0) {
1579 		free(probe_ctx);
1580 		return NULL;
1581 	}
1582 
1583 	return probe_ctx;
1584 }
1585 
1586 SPDK_LOG_REGISTER_COMPONENT(nvme)
1587