xref: /spdk/lib/nvme/nvme.c (revision 6c35d974ebb8d0e67a58261f46c0ebdd86baf054)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2015 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4  */
5 
6 #include "spdk/config.h"
7 #include "spdk/nvmf_spec.h"
8 #include "spdk/string.h"
9 #include "spdk/env.h"
10 #include "nvme_internal.h"
11 #include "nvme_io_msg.h"
12 
13 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
14 
15 struct nvme_driver	*g_spdk_nvme_driver;
16 pid_t			g_spdk_nvme_pid;
17 
18 /* gross timeout of 180 seconds in milliseconds */
19 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
20 
21 /* Per-process attached controller list */
22 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
23 	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
24 
25 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
26 static bool
27 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
28 {
29 	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
30 }
31 
32 void
33 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
34 		     struct spdk_nvme_ctrlr *ctrlr)
35 {
36 	TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
37 }
38 
39 static void
40 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr)
41 {
42 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
43 	if (nvme_ctrlr_shared(ctrlr)) {
44 		TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
45 	} else {
46 		TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
47 	}
48 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
49 }
50 
51 static int
52 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr,
53 			struct nvme_ctrlr_detach_ctx **_ctx)
54 {
55 	struct nvme_ctrlr_detach_ctx *ctx;
56 	int ref_count;
57 
58 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
59 
60 	ref_count = nvme_ctrlr_get_ref_count(ctrlr);
61 	assert(ref_count > 0);
62 
63 	if (ref_count == 1) {
64 		/* This is the last reference to the controller, so we need to
65 		 * allocate a context to destruct it.
66 		 */
67 		ctx = calloc(1, sizeof(*ctx));
68 		if (ctx == NULL) {
69 			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
70 
71 			return -ENOMEM;
72 		}
73 		ctx->ctrlr = ctrlr;
74 		ctx->cb_fn = nvme_ctrlr_detach_async_finish;
75 
76 		nvme_ctrlr_proc_put_ref(ctrlr);
77 
78 		nvme_io_msg_ctrlr_detach(ctrlr);
79 
80 		nvme_ctrlr_destruct_async(ctrlr, ctx);
81 
82 		*_ctx = ctx;
83 	} else {
84 		nvme_ctrlr_proc_put_ref(ctrlr);
85 	}
86 
87 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
88 
89 	return 0;
90 }
91 
92 static int
93 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx)
94 {
95 	int rc;
96 
97 	rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx);
98 	if (rc == -EAGAIN) {
99 		return -EAGAIN;
100 	}
101 
102 	free(ctx);
103 
104 	return rc;
105 }
106 
107 int
108 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
109 {
110 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
111 	int rc;
112 
113 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
114 	if (rc != 0) {
115 		return rc;
116 	} else if (ctx == NULL) {
117 		/* ctrlr was detached from the caller process but any other process
118 		 * still attaches it.
119 		 */
120 		return 0;
121 	}
122 
123 	while (1) {
124 		rc = nvme_ctrlr_detach_poll_async(ctx);
125 		if (rc != -EAGAIN) {
126 			break;
127 		}
128 		nvme_delay(1000);
129 	}
130 
131 	return 0;
132 }
133 
134 int
135 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr,
136 		       struct spdk_nvme_detach_ctx **_detach_ctx)
137 {
138 	struct spdk_nvme_detach_ctx *detach_ctx;
139 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
140 	int rc;
141 
142 	if (ctrlr == NULL || _detach_ctx == NULL) {
143 		return -EINVAL;
144 	}
145 
146 	/* Use a context header to poll detachment for multiple controllers.
147 	 * Allocate an new one if not allocated yet, or use the passed one otherwise.
148 	 */
149 	detach_ctx = *_detach_ctx;
150 	if (detach_ctx == NULL) {
151 		detach_ctx = calloc(1, sizeof(*detach_ctx));
152 		if (detach_ctx == NULL) {
153 			return -ENOMEM;
154 		}
155 		TAILQ_INIT(&detach_ctx->head);
156 	}
157 
158 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
159 	if (rc != 0 || ctx == NULL) {
160 		/* If this detach failed and the context header is empty, it means we just
161 		 * allocated the header and need to free it before returning.
162 		 */
163 		if (TAILQ_EMPTY(&detach_ctx->head)) {
164 			free(detach_ctx);
165 		}
166 		return rc;
167 	}
168 
169 	/* Append a context for this detachment to the context header. */
170 	TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link);
171 
172 	*_detach_ctx = detach_ctx;
173 
174 	return 0;
175 }
176 
177 int
178 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx)
179 {
180 	struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx;
181 	int rc;
182 
183 	if (detach_ctx == NULL) {
184 		return -EINVAL;
185 	}
186 
187 	TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) {
188 		TAILQ_REMOVE(&detach_ctx->head, ctx, link);
189 
190 		rc = nvme_ctrlr_detach_poll_async(ctx);
191 		if (rc == -EAGAIN) {
192 			/* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */
193 			TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link);
194 		}
195 	}
196 
197 	if (!TAILQ_EMPTY(&detach_ctx->head)) {
198 		return -EAGAIN;
199 	}
200 
201 	free(detach_ctx);
202 	return 0;
203 }
204 
205 void
206 spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx)
207 {
208 	while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
209 		;
210 	}
211 }
212 
213 void
214 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
215 {
216 	struct nvme_completion_poll_status	*status = arg;
217 
218 	if (status->timed_out) {
219 		/* There is no routine waiting for the completion of this request, free allocated memory */
220 		spdk_free(status->dma_data);
221 		free(status);
222 		return;
223 	}
224 
225 	/*
226 	 * Copy status into the argument passed by the caller, so that
227 	 *  the caller can check the status to determine if the
228 	 *  the request passed or failed.
229 	 */
230 	memcpy(&status->cpl, cpl, sizeof(*cpl));
231 	status->done = true;
232 }
233 
234 static void
235 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
236 {
237 }
238 
239 int
240 nvme_wait_for_completion_robust_lock_timeout_poll(struct spdk_nvme_qpair *qpair,
241 		struct nvme_completion_poll_status *status,
242 		pthread_mutex_t *robust_mutex)
243 {
244 	int rc;
245 
246 	if (robust_mutex) {
247 		nvme_robust_mutex_lock(robust_mutex);
248 	}
249 
250 	if (qpair->poll_group) {
251 		rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0,
252 				dummy_disconnected_qpair_cb);
253 	} else {
254 		rc = spdk_nvme_qpair_process_completions(qpair, 0);
255 	}
256 
257 	if (robust_mutex) {
258 		nvme_robust_mutex_unlock(robust_mutex);
259 	}
260 
261 	if (rc < 0) {
262 		status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
263 		status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
264 		goto error;
265 	}
266 
267 	if (!status->done && status->timeout_tsc && spdk_get_ticks() > status->timeout_tsc) {
268 		goto error;
269 	}
270 
271 	if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
272 		union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr);
273 		if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) {
274 			status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
275 			status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
276 			goto error;
277 		}
278 	}
279 
280 	if (!status->done) {
281 		return -EAGAIN;
282 	} else if (spdk_nvme_cpl_is_error(&status->cpl)) {
283 		return -EIO;
284 	} else {
285 		return 0;
286 	}
287 error:
288 	/* Either transport error occurred or we've timed out.  Either way, if the response hasn't
289 	 * been received yet, mark the command as timed out, so the status gets freed when the
290 	 * command is completed or aborted.
291 	 */
292 	if (!status->done) {
293 		status->timed_out = true;
294 	}
295 
296 	return -ECANCELED;
297 }
298 
299 /**
300  * Poll qpair for completions until a command completes.
301  *
302  * \param qpair queue to poll
303  * \param status completion status. The user must fill this structure with zeroes before calling
304  * this function
305  * \param robust_mutex optional robust mutex to lock while polling qpair
306  * \param timeout_in_usecs optional timeout
307  *
308  * \return 0 if command completed without error,
309  * -EIO if command completed with error,
310  * -ECANCELED if command is not completed due to transport/device error or time expired
311  *
312  *  The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
313  *  and status as the callback argument.
314  */
315 int
316 nvme_wait_for_completion_robust_lock_timeout(
317 	struct spdk_nvme_qpair *qpair,
318 	struct nvme_completion_poll_status *status,
319 	pthread_mutex_t *robust_mutex,
320 	uint64_t timeout_in_usecs)
321 {
322 	int rc;
323 
324 	if (timeout_in_usecs) {
325 		status->timeout_tsc = spdk_get_ticks() + timeout_in_usecs *
326 				      spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
327 	} else {
328 		status->timeout_tsc = 0;
329 	}
330 
331 	status->cpl.status_raw = 0;
332 	do {
333 		rc = nvme_wait_for_completion_robust_lock_timeout_poll(qpair, status, robust_mutex);
334 	} while (rc == -EAGAIN);
335 
336 	return rc;
337 }
338 
339 /**
340  * Poll qpair for completions until a command completes.
341  *
342  * \param qpair queue to poll
343  * \param status completion status. The user must fill this structure with zeroes before calling
344  * this function
345  * \param robust_mutex optional robust mutex to lock while polling qpair
346  *
347  * \return 0 if command completed without error,
348  * -EIO if command completed with error,
349  * -ECANCELED if command is not completed due to transport/device error
350  *
351  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
352  * and status as the callback argument.
353  */
354 int
355 nvme_wait_for_completion_robust_lock(
356 	struct spdk_nvme_qpair *qpair,
357 	struct nvme_completion_poll_status *status,
358 	pthread_mutex_t *robust_mutex)
359 {
360 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0);
361 }
362 
363 int
364 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
365 			 struct nvme_completion_poll_status *status)
366 {
367 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0);
368 }
369 
370 /**
371  * Poll qpair for completions until a command completes.
372  *
373  * \param qpair queue to poll
374  * \param status completion status. The user must fill this structure with zeroes before calling
375  * this function
376  * \param timeout_in_usecs optional timeout
377  *
378  * \return 0 if command completed without error,
379  * -EIO if command completed with error,
380  * -ECANCELED if command is not completed due to transport/device error or time expired
381  *
382  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
383  * and status as the callback argument.
384  */
385 int
386 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
387 				 struct nvme_completion_poll_status *status,
388 				 uint64_t timeout_in_usecs)
389 {
390 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs);
391 }
392 
393 static void
394 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
395 {
396 	struct nvme_request *req = arg;
397 	spdk_nvme_cmd_cb user_cb_fn;
398 	void *user_cb_arg;
399 	enum spdk_nvme_data_transfer xfer;
400 
401 	if (req->user_buffer && req->payload_size) {
402 		/* Copy back to the user buffer */
403 		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
404 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
405 		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
406 		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
407 			assert(req->pid == getpid());
408 			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
409 		}
410 	}
411 
412 	user_cb_fn = req->user_cb_fn;
413 	user_cb_arg = req->user_cb_arg;
414 	nvme_cleanup_user_req(req);
415 
416 	/* Call the user's original callback now that the buffer has been copied */
417 	user_cb_fn(user_cb_arg, cpl);
418 
419 }
420 
421 /**
422  * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
423  *
424  * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
425  * where the overhead of a copy is not a problem.
426  */
427 struct nvme_request *
428 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
429 				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
430 				void *cb_arg, bool host_to_controller)
431 {
432 	struct nvme_request *req;
433 	void *dma_buffer = NULL;
434 
435 	if (buffer && payload_size) {
436 		dma_buffer = spdk_zmalloc(payload_size, 4096, NULL,
437 					  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
438 		if (!dma_buffer) {
439 			return NULL;
440 		}
441 
442 		if (host_to_controller) {
443 			memcpy(dma_buffer, buffer, payload_size);
444 		}
445 	}
446 
447 	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
448 					   NULL);
449 	if (!req) {
450 		spdk_free(dma_buffer);
451 		return NULL;
452 	}
453 
454 	req->user_cb_fn = cb_fn;
455 	req->user_cb_arg = cb_arg;
456 	req->user_buffer = buffer;
457 	req->cb_arg = req;
458 
459 	return req;
460 }
461 
462 /**
463  * Check if a request has exceeded the controller timeout.
464  *
465  * \param req request to check for timeout.
466  * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
467  * \param active_proc per-process data for the controller associated with req
468  * \param now_tick current time from spdk_get_ticks()
469  * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
470  * 1 if requests newer than req need not be checked.
471  *
472  * The request's timeout callback will be called if needed; the caller is only responsible for
473  * calling this function on each outstanding request.
474  */
475 int
476 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
477 			   struct spdk_nvme_ctrlr_process *active_proc,
478 			   uint64_t now_tick)
479 {
480 	struct spdk_nvme_qpair *qpair = req->qpair;
481 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
482 	uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ?
483 				 active_proc->timeout_admin_ticks : active_proc->timeout_io_ticks;
484 
485 	assert(active_proc->timeout_cb_fn != NULL);
486 
487 	if (req->timed_out || req->submit_tick == 0) {
488 		return 0;
489 	}
490 
491 	if (req->pid != g_spdk_nvme_pid) {
492 		return 0;
493 	}
494 
495 	if (nvme_qpair_is_admin_queue(qpair) &&
496 	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
497 		return 0;
498 	}
499 
500 	if (req->submit_tick + timeout_ticks > now_tick) {
501 		return 1;
502 	}
503 
504 	req->timed_out = true;
505 
506 	/*
507 	 * We don't want to expose the admin queue to the user,
508 	 * so when we're timing out admin commands set the
509 	 * qpair to NULL.
510 	 */
511 	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
512 				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
513 				   cid);
514 	return 0;
515 }
516 
517 int
518 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
519 {
520 	int rc = 0;
521 
522 #ifdef __FreeBSD__
523 	pthread_mutex_init(mtx, NULL);
524 #else
525 	pthread_mutexattr_t attr;
526 
527 	if (pthread_mutexattr_init(&attr)) {
528 		return -1;
529 	}
530 	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
531 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
532 	    pthread_mutex_init(mtx, &attr)) {
533 		rc = -1;
534 	}
535 	pthread_mutexattr_destroy(&attr);
536 #endif
537 
538 	return rc;
539 }
540 
541 int
542 nvme_driver_init(void)
543 {
544 	static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER;
545 	int ret = 0;
546 
547 	/* Use a special process-private mutex to ensure the global
548 	 * nvme driver object (g_spdk_nvme_driver) gets initialized by
549 	 * only one thread.  Once that object is established and its
550 	 * mutex is initialized, we can unlock this mutex and use that
551 	 * one instead.
552 	 */
553 	pthread_mutex_lock(&g_init_mutex);
554 
555 	/* Each process needs its own pid. */
556 	g_spdk_nvme_pid = getpid();
557 
558 	/*
559 	 * Only one thread from one process will do this driver init work.
560 	 * The primary process will reserve the shared memory and do the
561 	 *  initialization.
562 	 * The secondary process will lookup the existing reserved memory.
563 	 */
564 	if (spdk_process_is_primary()) {
565 		/* The unique named memzone already reserved. */
566 		if (g_spdk_nvme_driver != NULL) {
567 			pthread_mutex_unlock(&g_init_mutex);
568 			return 0;
569 		} else {
570 			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
571 					     sizeof(struct nvme_driver), SPDK_ENV_NUMA_ID_ANY,
572 					     SPDK_MEMZONE_NO_IOVA_CONTIG);
573 		}
574 
575 		if (g_spdk_nvme_driver == NULL) {
576 			SPDK_ERRLOG("primary process failed to reserve memory\n");
577 			pthread_mutex_unlock(&g_init_mutex);
578 			return -1;
579 		}
580 	} else {
581 		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
582 
583 		/* The unique named memzone already reserved by the primary process. */
584 		if (g_spdk_nvme_driver != NULL) {
585 			int ms_waited = 0;
586 
587 			/* Wait the nvme driver to get initialized. */
588 			while ((g_spdk_nvme_driver->initialized == false) &&
589 			       (ms_waited < g_nvme_driver_timeout_ms)) {
590 				ms_waited++;
591 				nvme_delay(1000); /* delay 1ms */
592 			}
593 			if (g_spdk_nvme_driver->initialized == false) {
594 				SPDK_ERRLOG("timeout waiting for primary process to init\n");
595 				pthread_mutex_unlock(&g_init_mutex);
596 				return -1;
597 			}
598 		} else {
599 			SPDK_ERRLOG("primary process is not started yet\n");
600 			pthread_mutex_unlock(&g_init_mutex);
601 			return -1;
602 		}
603 
604 		pthread_mutex_unlock(&g_init_mutex);
605 		return 0;
606 	}
607 
608 	/*
609 	 * At this moment, only one thread from the primary process will do
610 	 * the g_spdk_nvme_driver initialization
611 	 */
612 	assert(spdk_process_is_primary());
613 
614 	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
615 	if (ret != 0) {
616 		SPDK_ERRLOG("failed to initialize mutex\n");
617 		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
618 		pthread_mutex_unlock(&g_init_mutex);
619 		return ret;
620 	}
621 
622 	/* The lock in the shared g_spdk_nvme_driver object is now ready to
623 	 * be used - so we can unlock the g_init_mutex here.
624 	 */
625 	pthread_mutex_unlock(&g_init_mutex);
626 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
627 
628 	g_spdk_nvme_driver->initialized = false;
629 	g_spdk_nvme_driver->hotplug_fd = spdk_pci_event_listen();
630 	if (g_spdk_nvme_driver->hotplug_fd < 0) {
631 		SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n");
632 	}
633 
634 	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
635 
636 	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
637 
638 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
639 
640 	return ret;
641 }
642 
643 /* This function must only be called while holding g_spdk_nvme_driver->lock */
644 int
645 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
646 		 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle)
647 {
648 	struct spdk_nvme_ctrlr *ctrlr;
649 	struct spdk_nvme_ctrlr_opts opts;
650 
651 	assert(trid != NULL);
652 
653 	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
654 
655 	if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) {
656 		ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, opts.hostnqn);
657 		if (ctrlr) {
658 			/* This ctrlr already exists. */
659 
660 			if (ctrlr->is_destructed) {
661 				/* This ctrlr is being destructed asynchronously. */
662 				SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n",
663 					    trid->traddr);
664 				probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, trid, -EBUSY);
665 				return -EBUSY;
666 			}
667 
668 			/* Increase the ref count before calling attach_cb() as the user may
669 			* call nvme_detach() immediately. */
670 			nvme_ctrlr_proc_get_ref(ctrlr);
671 
672 			if (probe_ctx->attach_cb) {
673 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
674 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
675 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
676 			}
677 			return 0;
678 		}
679 
680 		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
681 		if (ctrlr == NULL) {
682 			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
683 			probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, trid, -ENODEV);
684 			return -1;
685 		}
686 		ctrlr->remove_cb = probe_ctx->remove_cb;
687 		ctrlr->cb_ctx = probe_ctx->cb_ctx;
688 
689 		nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
690 		TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
691 		return 0;
692 	}
693 
694 	return 1;
695 }
696 
697 static void
698 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
699 			 struct spdk_nvme_probe_ctx *probe_ctx)
700 {
701 	int rc = 0;
702 	struct nvme_ctrlr_detach_ctx *detach_ctx;
703 
704 	rc = nvme_ctrlr_process_init(ctrlr);
705 
706 	if (rc) {
707 		/* Controller failed to initialize. */
708 		TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
709 		SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
710 		probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, &ctrlr->trid, rc);
711 		nvme_ctrlr_lock(ctrlr);
712 		nvme_ctrlr_fail(ctrlr, false);
713 		nvme_ctrlr_unlock(ctrlr);
714 
715 		/* allocate a context to detach this controller asynchronously */
716 		detach_ctx = calloc(1, sizeof(*detach_ctx));
717 		if (detach_ctx == NULL) {
718 			SPDK_WARNLOG("Failed to allocate asynchronous detach context. Performing synchronous destruct.\n");
719 			nvme_ctrlr_destruct(ctrlr);
720 			return;
721 		}
722 		detach_ctx->ctrlr = ctrlr;
723 		TAILQ_INSERT_TAIL(&probe_ctx->failed_ctxs.head, detach_ctx, link);
724 		nvme_ctrlr_destruct_async(ctrlr, detach_ctx);
725 		return;
726 	}
727 
728 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
729 		return;
730 	}
731 
732 	STAILQ_INIT(&ctrlr->io_producers);
733 
734 	/*
735 	 * Controller has been initialized.
736 	 *  Move it to the attached_ctrlrs list.
737 	 */
738 	TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
739 
740 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
741 	if (nvme_ctrlr_shared(ctrlr)) {
742 		TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
743 	} else {
744 		TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
745 	}
746 
747 	/*
748 	 * Increase the ref count before calling attach_cb() as the user may
749 	 * call nvme_detach() immediately.
750 	 */
751 	nvme_ctrlr_proc_get_ref(ctrlr);
752 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
753 
754 	if (probe_ctx->attach_cb) {
755 		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
756 	}
757 }
758 
759 static int
760 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx)
761 {
762 	int rc = 0;
763 
764 	while (true) {
765 		rc = spdk_nvme_probe_poll_async(probe_ctx);
766 		if (rc != -EAGAIN) {
767 			return rc;
768 		}
769 	}
770 
771 	return rc;
772 }
773 
774 /* This function must not be called while holding g_spdk_nvme_driver->lock */
775 static struct spdk_nvme_ctrlr *
776 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
777 {
778 	struct spdk_nvme_ctrlr *ctrlr;
779 
780 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
781 	ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid, hostnqn);
782 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
783 
784 	return ctrlr;
785 }
786 
787 /* This function must be called while holding g_spdk_nvme_driver->lock */
788 struct spdk_nvme_ctrlr *
789 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
790 {
791 	struct spdk_nvme_ctrlr *ctrlr;
792 
793 	/* Search per-process list */
794 	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
795 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) {
796 			continue;
797 		}
798 		if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) {
799 			continue;
800 		}
801 		return ctrlr;
802 	}
803 
804 	/* Search multi-process shared list */
805 	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
806 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) != 0) {
807 			continue;
808 		}
809 		if (hostnqn && strcmp(ctrlr->opts.hostnqn, hostnqn) != 0) {
810 			continue;
811 		}
812 		return ctrlr;
813 	}
814 
815 	return NULL;
816 }
817 
818 /* This function must only be called while holding g_spdk_nvme_driver->lock */
819 static int
820 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx,
821 		    bool direct_connect)
822 {
823 	int rc;
824 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
825 	const struct spdk_nvme_ctrlr_opts *opts = probe_ctx->opts;
826 
827 	if (strlen(probe_ctx->trid.trstring) == 0) {
828 		/* If user didn't provide trstring, derive it from trtype */
829 		spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype);
830 	}
831 
832 	if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) {
833 		SPDK_ERRLOG("NVMe trtype %u (%s) not available\n",
834 			    probe_ctx->trid.trtype, probe_ctx->trid.trstring);
835 		return -1;
836 	}
837 
838 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
839 
840 	rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect);
841 	if (rc != 0) {
842 		SPDK_ERRLOG("NVMe ctrlr scan failed\n");
843 		TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
844 			TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
845 			probe_ctx->attach_fail_cb(probe_ctx->cb_ctx, &ctrlr->trid, -EFAULT);
846 			nvme_transport_ctrlr_destruct(ctrlr);
847 		}
848 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
849 		return -1;
850 	}
851 
852 	/*
853 	 * Probe controllers on the shared_attached_ctrlrs list
854 	 */
855 	if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
856 		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
857 			/* Do not attach other ctrlrs if user specify a valid trid */
858 			if ((strlen(probe_ctx->trid.traddr) != 0) &&
859 			    (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) {
860 				continue;
861 			}
862 
863 			if (opts && strcmp(opts->hostnqn, ctrlr->opts.hostnqn) != 0) {
864 				continue;
865 			}
866 
867 			/* Do not attach if we failed to initialize it in this process */
868 			if (nvme_ctrlr_get_current_process(ctrlr) == NULL) {
869 				continue;
870 			}
871 
872 			nvme_ctrlr_proc_get_ref(ctrlr);
873 
874 			/*
875 			 * Unlock while calling attach_cb() so the user can call other functions
876 			 *  that may take the driver lock, like nvme_detach().
877 			 */
878 			if (probe_ctx->attach_cb) {
879 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
880 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
881 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
882 			}
883 		}
884 	}
885 
886 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
887 
888 	return 0;
889 }
890 
891 static void
892 nvme_dummy_attach_fail_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
893 			  int rc)
894 {
895 	SPDK_ERRLOG("Failed to attach nvme ctrlr: trtype=%s adrfam=%s traddr=%s trsvcid=%s "
896 		    "subnqn=%s, %s\n", spdk_nvme_transport_id_trtype_str(trid->trtype),
897 		    spdk_nvme_transport_id_adrfam_str(trid->adrfam), trid->traddr, trid->trsvcid,
898 		    trid->subnqn, spdk_strerror(-rc));
899 }
900 
901 static void
902 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
903 		    const struct spdk_nvme_transport_id *trid,
904 		    const struct spdk_nvme_ctrlr_opts *opts,
905 		    void *cb_ctx,
906 		    spdk_nvme_probe_cb probe_cb,
907 		    spdk_nvme_attach_cb attach_cb,
908 		    spdk_nvme_attach_fail_cb attach_fail_cb,
909 		    spdk_nvme_remove_cb remove_cb)
910 {
911 	probe_ctx->trid = *trid;
912 	probe_ctx->opts = opts;
913 	probe_ctx->cb_ctx = cb_ctx;
914 	probe_ctx->probe_cb = probe_cb;
915 	probe_ctx->attach_cb = attach_cb;
916 	if (attach_fail_cb != NULL) {
917 		probe_ctx->attach_fail_cb = attach_fail_cb;
918 	} else {
919 		probe_ctx->attach_fail_cb = nvme_dummy_attach_fail_cb;
920 	}
921 	probe_ctx->remove_cb = remove_cb;
922 	TAILQ_INIT(&probe_ctx->init_ctrlrs);
923 	TAILQ_INIT(&probe_ctx->failed_ctxs.head);
924 }
925 
926 int
927 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
928 		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
929 		spdk_nvme_remove_cb remove_cb)
930 {
931 	return spdk_nvme_probe_ext(trid, cb_ctx, probe_cb, attach_cb, NULL, remove_cb);
932 }
933 
934 int
935 spdk_nvme_probe_ext(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
936 		    spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
937 		    spdk_nvme_attach_fail_cb attach_fail_cb, spdk_nvme_remove_cb remove_cb)
938 {
939 	struct spdk_nvme_transport_id trid_pcie;
940 	struct spdk_nvme_probe_ctx *probe_ctx;
941 
942 	if (trid == NULL) {
943 		memset(&trid_pcie, 0, sizeof(trid_pcie));
944 		spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
945 		trid = &trid_pcie;
946 	}
947 
948 	probe_ctx = spdk_nvme_probe_async_ext(trid, cb_ctx, probe_cb,
949 					      attach_cb, attach_fail_cb, remove_cb);
950 	if (!probe_ctx) {
951 		SPDK_ERRLOG("Create probe context failed\n");
952 		return -1;
953 	}
954 
955 	/*
956 	 * Keep going even if one or more nvme_attach() calls failed,
957 	 *  but maintain the value of rc to signal errors when we return.
958 	 */
959 	return nvme_init_controllers(probe_ctx);
960 }
961 
962 static bool
963 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
964 		      struct spdk_nvme_ctrlr_opts *opts)
965 {
966 	struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx;
967 
968 	assert(requested_opts);
969 	memcpy(opts, requested_opts, sizeof(*opts));
970 
971 	return true;
972 }
973 
974 static void
975 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts,
976 		     const struct spdk_nvme_ctrlr_opts *opts_user,
977 		     size_t opts_size_user)
978 {
979 	assert(opts);
980 	assert(opts_user);
981 
982 	spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user);
983 
984 #define FIELD_OK(field) \
985 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size)
986 
987 #define SET_FIELD(field) \
988 	if (FIELD_OK(field)) { \
989 			opts->field = opts_user->field; \
990 	}
991 
992 #define SET_FIELD_ARRAY(field) \
993 	if (FIELD_OK(field)) { \
994 		memcpy(opts->field, opts_user->field, sizeof(opts_user->field)); \
995 	}
996 
997 	SET_FIELD(num_io_queues);
998 	SET_FIELD(use_cmb_sqs);
999 	SET_FIELD(no_shn_notification);
1000 	SET_FIELD(enable_interrupts);
1001 	SET_FIELD(arb_mechanism);
1002 	SET_FIELD(arbitration_burst);
1003 	SET_FIELD(low_priority_weight);
1004 	SET_FIELD(medium_priority_weight);
1005 	SET_FIELD(high_priority_weight);
1006 	SET_FIELD(keep_alive_timeout_ms);
1007 	SET_FIELD(transport_retry_count);
1008 	SET_FIELD(io_queue_size);
1009 	SET_FIELD_ARRAY(hostnqn);
1010 	SET_FIELD(io_queue_requests);
1011 	SET_FIELD_ARRAY(src_addr);
1012 	SET_FIELD_ARRAY(src_svcid);
1013 	SET_FIELD_ARRAY(host_id);
1014 	SET_FIELD_ARRAY(extended_host_id);
1015 	SET_FIELD(command_set);
1016 	SET_FIELD(admin_timeout_ms);
1017 	SET_FIELD(header_digest);
1018 	SET_FIELD(data_digest);
1019 	SET_FIELD(disable_error_logging);
1020 	SET_FIELD(transport_ack_timeout);
1021 	SET_FIELD(admin_queue_size);
1022 	SET_FIELD(fabrics_connect_timeout_us);
1023 	SET_FIELD(disable_read_ana_log_page);
1024 	SET_FIELD(disable_read_changed_ns_list_log_page);
1025 	SET_FIELD(tls_psk);
1026 	SET_FIELD(dhchap_key);
1027 	SET_FIELD(dhchap_ctrlr_key);
1028 	SET_FIELD(dhchap_digests);
1029 	SET_FIELD(dhchap_dhgroups);
1030 
1031 #undef FIELD_OK
1032 #undef SET_FIELD
1033 #undef SET_FIELD_ARRAY
1034 }
1035 
1036 struct spdk_nvme_ctrlr *
1037 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
1038 		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
1039 {
1040 	int rc;
1041 	struct spdk_nvme_ctrlr *ctrlr = NULL;
1042 	struct spdk_nvme_probe_ctx *probe_ctx;
1043 	struct spdk_nvme_ctrlr_opts *opts_local_p = NULL;
1044 	struct spdk_nvme_ctrlr_opts opts_local;
1045 	char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
1046 
1047 	if (trid == NULL) {
1048 		SPDK_ERRLOG("No transport ID specified\n");
1049 		return NULL;
1050 	}
1051 
1052 	rc = nvme_driver_init();
1053 	if (rc != 0) {
1054 		return NULL;
1055 	}
1056 
1057 	nvme_get_default_hostnqn(hostnqn, sizeof(hostnqn));
1058 	if (opts) {
1059 		opts_local_p = &opts_local;
1060 		nvme_ctrlr_opts_init(opts_local_p, opts, opts_size);
1061 		memcpy(hostnqn, opts_local.hostnqn, sizeof(hostnqn));
1062 	}
1063 
1064 	probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL);
1065 	if (!probe_ctx) {
1066 		SPDK_ERRLOG("Create probe context failed\n");
1067 		return NULL;
1068 	}
1069 
1070 	rc = nvme_init_controllers(probe_ctx);
1071 	if (rc != 0) {
1072 		return NULL;
1073 	}
1074 
1075 	ctrlr = nvme_get_ctrlr_by_trid(trid, hostnqn);
1076 
1077 	return ctrlr;
1078 }
1079 
1080 void
1081 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid,
1082 				  enum spdk_nvme_transport_type trtype)
1083 {
1084 	const char *trstring;
1085 
1086 	trid->trtype = trtype;
1087 	switch (trtype) {
1088 	case SPDK_NVME_TRANSPORT_FC:
1089 		trstring = SPDK_NVME_TRANSPORT_NAME_FC;
1090 		break;
1091 	case SPDK_NVME_TRANSPORT_PCIE:
1092 		trstring = SPDK_NVME_TRANSPORT_NAME_PCIE;
1093 		break;
1094 	case SPDK_NVME_TRANSPORT_RDMA:
1095 		trstring = SPDK_NVME_TRANSPORT_NAME_RDMA;
1096 		break;
1097 	case SPDK_NVME_TRANSPORT_TCP:
1098 		trstring = SPDK_NVME_TRANSPORT_NAME_TCP;
1099 		break;
1100 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1101 		trstring = SPDK_NVME_TRANSPORT_NAME_VFIOUSER;
1102 		break;
1103 	case SPDK_NVME_TRANSPORT_CUSTOM:
1104 		trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM;
1105 		break;
1106 	default:
1107 		SPDK_ERRLOG("no available transports\n");
1108 		assert(0);
1109 		return;
1110 	}
1111 	snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1112 }
1113 
1114 int
1115 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring)
1116 {
1117 	int i = 0;
1118 
1119 	if (trid == NULL || trstring == NULL) {
1120 		return -EINVAL;
1121 	}
1122 
1123 	/* Note: gcc-11 has some false positive -Wstringop-overread warnings with LTO builds if we
1124 	 * use strnlen here.  So do the trstring copy manually instead.  See GitHub issue #2391.
1125 	 */
1126 
1127 	/* cast official trstring to uppercase version of input. */
1128 	while (i < SPDK_NVMF_TRSTRING_MAX_LEN && trstring[i] != 0) {
1129 		trid->trstring[i] = toupper(trstring[i]);
1130 		i++;
1131 	}
1132 
1133 	if (trstring[i] != 0) {
1134 		return -EINVAL;
1135 	} else {
1136 		trid->trstring[i] = 0;
1137 		return 0;
1138 	}
1139 }
1140 
1141 int
1142 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
1143 {
1144 	if (trtype == NULL || str == NULL) {
1145 		return -EINVAL;
1146 	}
1147 
1148 	if (strcasecmp(str, "PCIe") == 0) {
1149 		*trtype = SPDK_NVME_TRANSPORT_PCIE;
1150 	} else if (strcasecmp(str, "RDMA") == 0) {
1151 		*trtype = SPDK_NVME_TRANSPORT_RDMA;
1152 	} else if (strcasecmp(str, "FC") == 0) {
1153 		*trtype = SPDK_NVME_TRANSPORT_FC;
1154 	} else if (strcasecmp(str, "TCP") == 0) {
1155 		*trtype = SPDK_NVME_TRANSPORT_TCP;
1156 	} else if (strcasecmp(str, "VFIOUSER") == 0) {
1157 		*trtype = SPDK_NVME_TRANSPORT_VFIOUSER;
1158 	} else {
1159 		*trtype = SPDK_NVME_TRANSPORT_CUSTOM;
1160 	}
1161 	return 0;
1162 }
1163 
1164 const char *
1165 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
1166 {
1167 	switch (trtype) {
1168 	case SPDK_NVME_TRANSPORT_PCIE:
1169 		return "PCIe";
1170 	case SPDK_NVME_TRANSPORT_RDMA:
1171 		return "RDMA";
1172 	case SPDK_NVME_TRANSPORT_FC:
1173 		return "FC";
1174 	case SPDK_NVME_TRANSPORT_TCP:
1175 		return "TCP";
1176 	case SPDK_NVME_TRANSPORT_VFIOUSER:
1177 		return "VFIOUSER";
1178 	case SPDK_NVME_TRANSPORT_CUSTOM:
1179 		return "CUSTOM";
1180 	default:
1181 		return NULL;
1182 	}
1183 }
1184 
1185 int
1186 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
1187 {
1188 	if (adrfam == NULL || str == NULL) {
1189 		return -EINVAL;
1190 	}
1191 
1192 	if (strcasecmp(str, "IPv4") == 0) {
1193 		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
1194 	} else if (strcasecmp(str, "IPv6") == 0) {
1195 		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
1196 	} else if (strcasecmp(str, "IB") == 0) {
1197 		*adrfam = SPDK_NVMF_ADRFAM_IB;
1198 	} else if (strcasecmp(str, "FC") == 0) {
1199 		*adrfam = SPDK_NVMF_ADRFAM_FC;
1200 	} else {
1201 		return -ENOENT;
1202 	}
1203 	return 0;
1204 }
1205 
1206 const char *
1207 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
1208 {
1209 	switch (adrfam) {
1210 	case SPDK_NVMF_ADRFAM_IPV4:
1211 		return "IPv4";
1212 	case SPDK_NVMF_ADRFAM_IPV6:
1213 		return "IPv6";
1214 	case SPDK_NVMF_ADRFAM_IB:
1215 		return "IB";
1216 	case SPDK_NVMF_ADRFAM_FC:
1217 		return "FC";
1218 	default:
1219 		return NULL;
1220 	}
1221 }
1222 
1223 static size_t
1224 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size)
1225 {
1226 
1227 	const char *sep, *sep1;
1228 	const char *whitespace = " \t\n";
1229 	size_t key_len, val_len;
1230 
1231 	*str += strspn(*str, whitespace);
1232 
1233 	sep = strchr(*str, ':');
1234 	if (!sep) {
1235 		sep = strchr(*str, '=');
1236 		if (!sep) {
1237 			SPDK_ERRLOG("Key without ':' or '=' separator\n");
1238 			return 0;
1239 		}
1240 	} else {
1241 		sep1 = strchr(*str, '=');
1242 		if ((sep1 != NULL) && (sep1 < sep)) {
1243 			sep = sep1;
1244 		}
1245 	}
1246 
1247 	key_len = sep - *str;
1248 	if (key_len >= key_buf_size) {
1249 		SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
1250 			    key_len, key_buf_size - 1);
1251 		return 0;
1252 	}
1253 
1254 	memcpy(key, *str, key_len);
1255 	key[key_len] = '\0';
1256 
1257 	*str += key_len + 1; /* Skip key: */
1258 	val_len = strcspn(*str, whitespace);
1259 	if (val_len == 0) {
1260 		SPDK_ERRLOG("Key without value\n");
1261 		return 0;
1262 	}
1263 
1264 	if (val_len >= val_buf_size) {
1265 		SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
1266 			    val_len, val_buf_size - 1);
1267 		return 0;
1268 	}
1269 
1270 	memcpy(val, *str, val_len);
1271 	val[val_len] = '\0';
1272 
1273 	*str += val_len;
1274 
1275 	return val_len;
1276 }
1277 
1278 int
1279 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
1280 {
1281 	size_t val_len;
1282 	char key[32];
1283 	char val[1024];
1284 
1285 	if (trid == NULL || str == NULL) {
1286 		return -EINVAL;
1287 	}
1288 
1289 	while (*str != '\0') {
1290 
1291 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1292 
1293 		if (val_len == 0) {
1294 			SPDK_ERRLOG("Failed to parse transport ID\n");
1295 			return -EINVAL;
1296 		}
1297 
1298 		if (strcasecmp(key, "trtype") == 0) {
1299 			if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) {
1300 				SPDK_ERRLOG("invalid transport '%s'\n", val);
1301 				return -EINVAL;
1302 			}
1303 			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
1304 				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
1305 				return -EINVAL;
1306 			}
1307 		} else if (strcasecmp(key, "adrfam") == 0) {
1308 			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
1309 				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
1310 				return -EINVAL;
1311 			}
1312 		} else if (strcasecmp(key, "traddr") == 0) {
1313 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1314 				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
1315 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1316 				return -EINVAL;
1317 			}
1318 			memcpy(trid->traddr, val, val_len + 1);
1319 		} else if (strcasecmp(key, "trsvcid") == 0) {
1320 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1321 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1322 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1323 				return -EINVAL;
1324 			}
1325 			memcpy(trid->trsvcid, val, val_len + 1);
1326 		} else if (strcasecmp(key, "priority") == 0) {
1327 			if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) {
1328 				SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n",
1329 					    val_len, SPDK_NVMF_PRIORITY_MAX_LEN);
1330 				return -EINVAL;
1331 			}
1332 			trid->priority = spdk_strtol(val, 10);
1333 		} else if (strcasecmp(key, "subnqn") == 0) {
1334 			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
1335 				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
1336 					    val_len, SPDK_NVMF_NQN_MAX_LEN);
1337 				return -EINVAL;
1338 			}
1339 			memcpy(trid->subnqn, val, val_len + 1);
1340 		} else if (strcasecmp(key, "hostaddr") == 0) {
1341 			continue;
1342 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1343 			continue;
1344 		} else if (strcasecmp(key, "hostnqn") == 0) {
1345 			continue;
1346 		} else if (strcasecmp(key, "ns") == 0) {
1347 			/*
1348 			 * Special case.  The namespace id parameter may
1349 			 * optionally be passed in the transport id string
1350 			 * for an SPDK application (e.g. spdk_nvme_perf)
1351 			 * and additionally parsed therein to limit
1352 			 * targeting a specific namespace.  For this
1353 			 * scenario, just silently ignore this key
1354 			 * rather than letting it default to logging
1355 			 * it as an invalid key.
1356 			 */
1357 			continue;
1358 		} else if (strcasecmp(key, "alt_traddr") == 0) {
1359 			/*
1360 			 * Used by applications for enabling transport ID failover.
1361 			 * Please see the case above for more information on custom parameters.
1362 			 */
1363 			continue;
1364 		} else {
1365 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1366 		}
1367 	}
1368 
1369 	return 0;
1370 }
1371 
1372 int
1373 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str)
1374 {
1375 
1376 	size_t key_size = 32;
1377 	size_t val_size = 1024;
1378 	size_t val_len;
1379 	char key[key_size];
1380 	char val[val_size];
1381 
1382 	if (hostid == NULL || str == NULL) {
1383 		return -EINVAL;
1384 	}
1385 
1386 	while (*str != '\0') {
1387 
1388 		val_len = parse_next_key(&str, key, val, key_size, val_size);
1389 
1390 		if (val_len == 0) {
1391 			SPDK_ERRLOG("Failed to parse host ID\n");
1392 			return val_len;
1393 		}
1394 
1395 		/* Ignore the rest of the options from the transport ID. */
1396 		if (strcasecmp(key, "trtype") == 0) {
1397 			continue;
1398 		} else if (strcasecmp(key, "adrfam") == 0) {
1399 			continue;
1400 		} else if (strcasecmp(key, "traddr") == 0) {
1401 			continue;
1402 		} else if (strcasecmp(key, "trsvcid") == 0) {
1403 			continue;
1404 		} else if (strcasecmp(key, "subnqn") == 0) {
1405 			continue;
1406 		} else if (strcasecmp(key, "priority") == 0) {
1407 			continue;
1408 		} else if (strcasecmp(key, "ns") == 0) {
1409 			continue;
1410 		} else if (strcasecmp(key, "hostaddr") == 0) {
1411 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1412 				SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
1413 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1414 				return -EINVAL;
1415 			}
1416 			memcpy(hostid->hostaddr, val, val_len + 1);
1417 
1418 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1419 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1420 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1421 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1422 				return -EINVAL;
1423 			}
1424 			memcpy(hostid->hostsvcid, val, val_len + 1);
1425 		} else {
1426 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1427 		}
1428 	}
1429 
1430 	return 0;
1431 }
1432 
1433 static int
1434 cmp_int(int a, int b)
1435 {
1436 	return a - b;
1437 }
1438 
1439 int
1440 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
1441 			       const struct spdk_nvme_transport_id *trid2)
1442 {
1443 	int cmp;
1444 
1445 	if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
1446 		cmp = strcasecmp(trid1->trstring, trid2->trstring);
1447 	} else {
1448 		cmp = cmp_int(trid1->trtype, trid2->trtype);
1449 	}
1450 
1451 	if (cmp) {
1452 		return cmp;
1453 	}
1454 
1455 	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
1456 		struct spdk_pci_addr pci_addr1 = {};
1457 		struct spdk_pci_addr pci_addr2 = {};
1458 
1459 		/* Normalize PCI addresses before comparing */
1460 		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
1461 		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
1462 			return -1;
1463 		}
1464 
1465 		/* PCIe transport ID only uses trtype and traddr */
1466 		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
1467 	}
1468 
1469 	cmp = strcasecmp(trid1->traddr, trid2->traddr);
1470 	if (cmp) {
1471 		return cmp;
1472 	}
1473 
1474 	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
1475 	if (cmp) {
1476 		return cmp;
1477 	}
1478 
1479 	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
1480 	if (cmp) {
1481 		return cmp;
1482 	}
1483 
1484 	cmp = strcmp(trid1->subnqn, trid2->subnqn);
1485 	if (cmp) {
1486 		return cmp;
1487 	}
1488 
1489 	return 0;
1490 }
1491 
1492 int
1493 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str)
1494 {
1495 	size_t val_len;
1496 	char key[32];
1497 	char val[1024];
1498 
1499 	if (prchk_flags == NULL || str == NULL) {
1500 		return -EINVAL;
1501 	}
1502 
1503 	while (*str != '\0') {
1504 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1505 
1506 		if (val_len == 0) {
1507 			SPDK_ERRLOG("Failed to parse prchk\n");
1508 			return -EINVAL;
1509 		}
1510 
1511 		if (strcasecmp(key, "prchk") == 0) {
1512 			if (strcasestr(val, "reftag") != NULL) {
1513 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
1514 			}
1515 			if (strcasestr(val, "guard") != NULL) {
1516 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
1517 			}
1518 		} else {
1519 			SPDK_ERRLOG("Unknown key '%s'\n", key);
1520 			return -EINVAL;
1521 		}
1522 	}
1523 
1524 	return 0;
1525 }
1526 
1527 const char *
1528 spdk_nvme_prchk_flags_str(uint32_t prchk_flags)
1529 {
1530 	if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
1531 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1532 			return "prchk:reftag|guard";
1533 		} else {
1534 			return "prchk:reftag";
1535 		}
1536 	} else {
1537 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1538 			return "prchk:guard";
1539 		} else {
1540 			return NULL;
1541 		}
1542 	}
1543 }
1544 
1545 int
1546 spdk_nvme_scan_attached(const struct spdk_nvme_transport_id *trid)
1547 {
1548 	int rc;
1549 	struct spdk_nvme_probe_ctx *probe_ctx;
1550 
1551 	rc = nvme_driver_init();
1552 	if (rc != 0) {
1553 		return rc;
1554 	}
1555 
1556 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1557 	if (!probe_ctx) {
1558 		return -ENOMEM;
1559 	}
1560 
1561 	nvme_probe_ctx_init(probe_ctx, trid, NULL, NULL, NULL, NULL, NULL, NULL);
1562 
1563 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1564 	rc = nvme_transport_ctrlr_scan_attached(probe_ctx);
1565 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1566 	free(probe_ctx);
1567 
1568 	return rc < 0 ? rc : 0;
1569 }
1570 
1571 struct spdk_nvme_probe_ctx *
1572 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
1573 		      void *cb_ctx,
1574 		      spdk_nvme_probe_cb probe_cb,
1575 		      spdk_nvme_attach_cb attach_cb,
1576 		      spdk_nvme_remove_cb remove_cb)
1577 {
1578 	return spdk_nvme_probe_async_ext(trid, cb_ctx, probe_cb, attach_cb, NULL, remove_cb);
1579 }
1580 
1581 struct spdk_nvme_probe_ctx *
1582 spdk_nvme_probe_async_ext(const struct spdk_nvme_transport_id *trid,
1583 			  void *cb_ctx,
1584 			  spdk_nvme_probe_cb probe_cb,
1585 			  spdk_nvme_attach_cb attach_cb,
1586 			  spdk_nvme_attach_fail_cb attach_fail_cb,
1587 			  spdk_nvme_remove_cb remove_cb)
1588 {
1589 	int rc;
1590 	struct spdk_nvme_probe_ctx *probe_ctx;
1591 
1592 	rc = nvme_driver_init();
1593 	if (rc != 0) {
1594 		return NULL;
1595 	}
1596 
1597 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1598 	if (!probe_ctx) {
1599 		return NULL;
1600 	}
1601 
1602 	nvme_probe_ctx_init(probe_ctx, trid, NULL, cb_ctx, probe_cb, attach_cb, attach_fail_cb,
1603 			    remove_cb);
1604 	rc = nvme_probe_internal(probe_ctx, false);
1605 	if (rc != 0) {
1606 		free(probe_ctx);
1607 		return NULL;
1608 	}
1609 
1610 	return probe_ctx;
1611 }
1612 
1613 int
1614 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
1615 {
1616 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
1617 	struct nvme_ctrlr_detach_ctx *detach_ctx, *detach_ctx_tmp;
1618 	int rc;
1619 
1620 	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1621 		free(probe_ctx);
1622 		return 0;
1623 	}
1624 
1625 	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
1626 		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
1627 	}
1628 
1629 	/* poll failed controllers destruction */
1630 	TAILQ_FOREACH_SAFE(detach_ctx, &probe_ctx->failed_ctxs.head, link, detach_ctx_tmp) {
1631 		rc = nvme_ctrlr_destruct_poll_async(detach_ctx->ctrlr, detach_ctx);
1632 		if (rc == -EAGAIN) {
1633 			continue;
1634 		}
1635 
1636 		if (rc != 0) {
1637 			SPDK_ERRLOG("Failure while polling the controller destruction (rc = %d)\n", rc);
1638 		}
1639 
1640 		TAILQ_REMOVE(&probe_ctx->failed_ctxs.head, detach_ctx, link);
1641 		free(detach_ctx);
1642 	}
1643 
1644 	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs) && TAILQ_EMPTY(&probe_ctx->failed_ctxs.head)) {
1645 		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1646 		g_spdk_nvme_driver->initialized = true;
1647 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1648 		free(probe_ctx);
1649 		return 0;
1650 	}
1651 
1652 	return -EAGAIN;
1653 }
1654 
1655 struct spdk_nvme_probe_ctx *
1656 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
1657 			const struct spdk_nvme_ctrlr_opts *opts,
1658 			spdk_nvme_attach_cb attach_cb)
1659 {
1660 	int rc;
1661 	spdk_nvme_probe_cb probe_cb = NULL;
1662 	struct spdk_nvme_probe_ctx *probe_ctx;
1663 
1664 	rc = nvme_driver_init();
1665 	if (rc != 0) {
1666 		return NULL;
1667 	}
1668 
1669 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1670 	if (!probe_ctx) {
1671 		return NULL;
1672 	}
1673 
1674 	if (opts) {
1675 		probe_cb = nvme_connect_probe_cb;
1676 	}
1677 
1678 	nvme_probe_ctx_init(probe_ctx, trid, opts, (void *)opts, probe_cb, attach_cb, NULL, NULL);
1679 	rc = nvme_probe_internal(probe_ctx, true);
1680 	if (rc != 0) {
1681 		free(probe_ctx);
1682 		return NULL;
1683 	}
1684 
1685 	return probe_ctx;
1686 }
1687 
1688 int
1689 nvme_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service,
1690 		long int *port)
1691 {
1692 	struct addrinfo *res;
1693 	struct addrinfo hints;
1694 	int ret;
1695 
1696 	memset(&hints, 0, sizeof(hints));
1697 	hints.ai_family = family;
1698 	hints.ai_socktype = SOCK_STREAM;
1699 	hints.ai_protocol = 0;
1700 
1701 	if (service != NULL) {
1702 		*port = spdk_strtol(service, 10);
1703 		if (*port <= 0 || *port >= 65536) {
1704 			SPDK_ERRLOG("Invalid port: %s\n", service);
1705 			return -EINVAL;
1706 		}
1707 	}
1708 
1709 	ret = getaddrinfo(addr, service, &hints, &res);
1710 	if (ret) {
1711 		SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret);
1712 		return -(abs(ret));
1713 	}
1714 
1715 	if (res->ai_addrlen > sizeof(*sa)) {
1716 		SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen);
1717 		ret = -EINVAL;
1718 	} else {
1719 		memcpy(sa, res->ai_addr, res->ai_addrlen);
1720 	}
1721 
1722 	freeaddrinfo(res);
1723 	return ret;
1724 }
1725 
1726 int
1727 nvme_get_default_hostnqn(char *buf, int len)
1728 {
1729 	char uuid[SPDK_UUID_STRING_LEN];
1730 	int rc;
1731 
1732 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &g_spdk_nvme_driver->default_extended_host_id);
1733 	rc = snprintf(buf, len, "nqn.2014-08.org.nvmexpress:uuid:%s", uuid);
1734 	if (rc < 0 || rc >= len) {
1735 		return -EINVAL;
1736 	}
1737 
1738 	return 0;
1739 }
1740 
1741 SPDK_LOG_REGISTER_COMPONENT(nvme)
1742