xref: /spdk/lib/nvme/nvme.c (revision d73077b84a71985da1db1c9847ea7c042189bae2)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/nvmf_spec.h"
35 #include "spdk/string.h"
36 #include "nvme_internal.h"
37 #include "nvme_io_msg.h"
38 #include "nvme_uevent.h"
39 
40 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
41 
42 struct nvme_driver	*g_spdk_nvme_driver;
43 pid_t			g_spdk_nvme_pid;
44 
45 /* gross timeout of 180 seconds in milliseconds */
46 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
47 
48 /* Per-process attached controller list */
49 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
50 	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
51 
52 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
53 static bool
54 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
55 {
56 	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
57 }
58 
59 void
60 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
61 		     struct spdk_nvme_ctrlr *ctrlr)
62 {
63 	TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
64 }
65 
66 static void
67 nvme_ctrlr_detach_async_finish(struct spdk_nvme_ctrlr *ctrlr)
68 {
69 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
70 	if (nvme_ctrlr_shared(ctrlr)) {
71 		TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
72 	} else {
73 		TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
74 	}
75 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
76 }
77 
78 static int
79 nvme_ctrlr_detach_async(struct spdk_nvme_ctrlr *ctrlr,
80 			struct nvme_ctrlr_detach_ctx **_ctx)
81 {
82 	struct nvme_ctrlr_detach_ctx *ctx;
83 	int ref_count;
84 
85 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
86 
87 	ref_count = nvme_ctrlr_get_ref_count(ctrlr);
88 	assert(ref_count > 0);
89 
90 	if (ref_count == 1) {
91 		/* This is the last reference to the controller, so we need to
92 		 * allocate a context to destruct it.
93 		 */
94 		ctx = calloc(1, sizeof(*ctx));
95 		if (ctx == NULL) {
96 			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
97 
98 			return -ENOMEM;
99 		}
100 		ctx->ctrlr = ctrlr;
101 		ctx->cb_fn = nvme_ctrlr_detach_async_finish;
102 
103 		nvme_ctrlr_proc_put_ref(ctrlr);
104 
105 		nvme_io_msg_ctrlr_detach(ctrlr);
106 
107 		nvme_ctrlr_destruct_async(ctrlr, ctx);
108 
109 		*_ctx = ctx;
110 	} else {
111 		nvme_ctrlr_proc_put_ref(ctrlr);
112 	}
113 
114 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
115 
116 	return 0;
117 }
118 
119 static int
120 nvme_ctrlr_detach_poll_async(struct nvme_ctrlr_detach_ctx *ctx)
121 {
122 	int rc;
123 
124 	rc = nvme_ctrlr_destruct_poll_async(ctx->ctrlr, ctx);
125 	if (rc == -EAGAIN) {
126 		return -EAGAIN;
127 	}
128 
129 	free(ctx);
130 
131 	return rc;
132 }
133 
134 int
135 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
136 {
137 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
138 	int rc;
139 
140 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
141 	if (rc != 0) {
142 		return rc;
143 	} else if (ctx == NULL) {
144 		/* ctrlr was detached from the caller process but any other process
145 		 * still attaches it.
146 		 */
147 		return 0;
148 	}
149 
150 	while (1) {
151 		rc = nvme_ctrlr_detach_poll_async(ctx);
152 		if (rc != -EAGAIN) {
153 			break;
154 		}
155 		nvme_delay(1000);
156 	}
157 
158 	return 0;
159 }
160 
161 int
162 spdk_nvme_detach_async(struct spdk_nvme_ctrlr *ctrlr,
163 		       struct spdk_nvme_detach_ctx **_detach_ctx)
164 {
165 	struct spdk_nvme_detach_ctx *detach_ctx;
166 	struct nvme_ctrlr_detach_ctx *ctx = NULL;
167 	int rc;
168 
169 	if (ctrlr == NULL || _detach_ctx == NULL) {
170 		return -EINVAL;
171 	}
172 
173 	/* Use a context header to poll detachement for multiple controllers.
174 	 * Allocate an new one if not allocated yet, or use the passed one otherwise.
175 	 */
176 	detach_ctx = *_detach_ctx;
177 	if (detach_ctx == NULL) {
178 		detach_ctx = calloc(1, sizeof(*detach_ctx));
179 		if (detach_ctx == NULL) {
180 			return -ENOMEM;
181 		}
182 		TAILQ_INIT(&detach_ctx->head);
183 	} else if (detach_ctx->polling_started) {
184 		SPDK_ERRLOG("Busy at polling detachment now.\n");
185 		return -EBUSY;
186 	}
187 
188 	rc = nvme_ctrlr_detach_async(ctrlr, &ctx);
189 	if (rc != 0 || ctx == NULL) {
190 		/* If this detach failed and the context header is empty, it means we just
191 		 * allocated the header and need to free it before returning.
192 		 */
193 		if (TAILQ_EMPTY(&detach_ctx->head)) {
194 			free(detach_ctx);
195 		}
196 		return rc;
197 	}
198 
199 	/* Append a context for this detachment to the context header. */
200 	TAILQ_INSERT_TAIL(&detach_ctx->head, ctx, link);
201 
202 	*_detach_ctx = detach_ctx;
203 
204 	return 0;
205 }
206 
207 int
208 spdk_nvme_detach_poll_async(struct spdk_nvme_detach_ctx *detach_ctx)
209 {
210 	struct nvme_ctrlr_detach_ctx *ctx, *tmp_ctx;
211 	int rc;
212 
213 	if (detach_ctx == NULL) {
214 		return -EINVAL;
215 	}
216 
217 	detach_ctx->polling_started = true;
218 
219 	TAILQ_FOREACH_SAFE(ctx, &detach_ctx->head, link, tmp_ctx) {
220 		TAILQ_REMOVE(&detach_ctx->head, ctx, link);
221 
222 		rc = nvme_ctrlr_detach_poll_async(ctx);
223 		if (rc == -EAGAIN) {
224 			/* If not -EAGAIN, ctx was freed by nvme_ctrlr_detach_poll_async(). */
225 			TAILQ_INSERT_HEAD(&detach_ctx->head, ctx, link);
226 		}
227 	}
228 
229 	if (!TAILQ_EMPTY(&detach_ctx->head)) {
230 		return -EAGAIN;
231 	}
232 
233 	free(detach_ctx);
234 	return 0;
235 }
236 
237 void
238 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
239 {
240 	struct nvme_completion_poll_status	*status = arg;
241 
242 	if (status->timed_out) {
243 		/* There is no routine waiting for the completion of this request, free allocated memory */
244 		free(status);
245 		return;
246 	}
247 
248 	/*
249 	 * Copy status into the argument passed by the caller, so that
250 	 *  the caller can check the status to determine if the
251 	 *  the request passed or failed.
252 	 */
253 	memcpy(&status->cpl, cpl, sizeof(*cpl));
254 	status->done = true;
255 }
256 
257 static void
258 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
259 {
260 }
261 
262 /**
263  * Poll qpair for completions until a command completes.
264  *
265  * \param qpair queue to poll
266  * \param status completion status. The user must fill this structure with zeroes before calling
267  * this function
268  * \param robust_mutex optional robust mutex to lock while polling qpair
269  * \param timeout_in_usecs optional timeout
270  *
271  * \return 0 if command completed without error,
272  * -EIO if command completed with error,
273  * -ECANCELED if command is not completed due to transport/device error or time expired
274  *
275  *  The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
276  *  and status as the callback argument.
277  */
278 int
279 nvme_wait_for_completion_robust_lock_timeout(
280 	struct spdk_nvme_qpair *qpair,
281 	struct nvme_completion_poll_status *status,
282 	pthread_mutex_t *robust_mutex,
283 	uint64_t timeout_in_usecs)
284 {
285 	uint64_t timeout_tsc = 0;
286 	int rc = 0;
287 
288 	if (timeout_in_usecs) {
289 		timeout_tsc = spdk_get_ticks() + timeout_in_usecs * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
290 	}
291 
292 	while (status->done == false) {
293 		if (robust_mutex) {
294 			nvme_robust_mutex_lock(robust_mutex);
295 		}
296 
297 		if (qpair->poll_group) {
298 			rc = (int)spdk_nvme_poll_group_process_completions(qpair->poll_group->group, 0,
299 					dummy_disconnected_qpair_cb);
300 		} else {
301 			rc = spdk_nvme_qpair_process_completions(qpair, 0);
302 		}
303 
304 		if (robust_mutex) {
305 			nvme_robust_mutex_unlock(robust_mutex);
306 		}
307 
308 		if (rc < 0) {
309 			status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
310 			status->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
311 			break;
312 		}
313 		if (timeout_tsc && spdk_get_ticks() > timeout_tsc) {
314 			rc = -1;
315 			break;
316 		}
317 		if (qpair->ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
318 			union spdk_nvme_csts_register csts = spdk_nvme_ctrlr_get_regs_csts(qpair->ctrlr);
319 			if (csts.raw == SPDK_NVME_INVALID_REGISTER_VALUE) {
320 				status->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
321 				status->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
322 				break;
323 			}
324 		}
325 	}
326 
327 	if (status->done == false) {
328 		status->timed_out = true;
329 	}
330 
331 	if (rc < 0) {
332 		return -ECANCELED;
333 	}
334 
335 	return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0;
336 }
337 
338 /**
339  * Poll qpair for completions until a command completes.
340  *
341  * \param qpair queue to poll
342  * \param status completion status. The user must fill this structure with zeroes before calling
343  * this function
344  * \param robust_mutex optional robust mutex to lock while polling qpair
345  *
346  * \return 0 if command completed without error,
347  * -EIO if command completed with error,
348  * -ECANCELED if command is not completed due to transport/device error
349  *
350  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
351  * and status as the callback argument.
352  */
353 int
354 nvme_wait_for_completion_robust_lock(
355 	struct spdk_nvme_qpair *qpair,
356 	struct nvme_completion_poll_status *status,
357 	pthread_mutex_t *robust_mutex)
358 {
359 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, robust_mutex, 0);
360 }
361 
362 int
363 nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
364 			 struct nvme_completion_poll_status *status)
365 {
366 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, 0);
367 }
368 
369 /**
370  * Poll qpair for completions until a command completes.
371  *
372  * \param qpair queue to poll
373  * \param status completion status. The user must fill this structure with zeroes before calling
374  * this function
375  * \param timeout_in_usecs optional timeout
376  *
377  * \return 0 if command completed without error,
378  * -EIO if command completed with error,
379  * -ECANCELED if command is not completed due to transport/device error or time expired
380  *
381  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
382  * and status as the callback argument.
383  */
384 int
385 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
386 				 struct nvme_completion_poll_status *status,
387 				 uint64_t timeout_in_usecs)
388 {
389 	return nvme_wait_for_completion_robust_lock_timeout(qpair, status, NULL, timeout_in_usecs);
390 }
391 
392 static void
393 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
394 {
395 	struct nvme_request *req = arg;
396 	enum spdk_nvme_data_transfer xfer;
397 
398 	if (req->user_buffer && req->payload_size) {
399 		/* Copy back to the user buffer and free the contig buffer */
400 		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
401 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
402 		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
403 		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
404 			assert(req->pid == getpid());
405 			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
406 		}
407 
408 		spdk_free(req->payload.contig_or_cb_arg);
409 	}
410 
411 	/* Call the user's original callback now that the buffer has been copied */
412 	req->user_cb_fn(req->user_cb_arg, cpl);
413 }
414 
415 /**
416  * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
417  *
418  * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
419  * where the overhead of a copy is not a problem.
420  */
421 struct nvme_request *
422 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
423 				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
424 				void *cb_arg, bool host_to_controller)
425 {
426 	struct nvme_request *req;
427 	void *dma_buffer = NULL;
428 
429 	if (buffer && payload_size) {
430 		dma_buffer = spdk_zmalloc(payload_size, 4096, NULL,
431 					  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
432 		if (!dma_buffer) {
433 			return NULL;
434 		}
435 
436 		if (host_to_controller) {
437 			memcpy(dma_buffer, buffer, payload_size);
438 		}
439 	}
440 
441 	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
442 					   NULL);
443 	if (!req) {
444 		spdk_free(dma_buffer);
445 		return NULL;
446 	}
447 
448 	req->user_cb_fn = cb_fn;
449 	req->user_cb_arg = cb_arg;
450 	req->user_buffer = buffer;
451 	req->cb_arg = req;
452 
453 	return req;
454 }
455 
456 /**
457  * Check if a request has exceeded the controller timeout.
458  *
459  * \param req request to check for timeout.
460  * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
461  * \param active_proc per-process data for the controller associated with req
462  * \param now_tick current time from spdk_get_ticks()
463  * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
464  * 1 if requests newer than req need not be checked.
465  *
466  * The request's timeout callback will be called if needed; the caller is only responsible for
467  * calling this function on each outstanding request.
468  */
469 int
470 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
471 			   struct spdk_nvme_ctrlr_process *active_proc,
472 			   uint64_t now_tick)
473 {
474 	struct spdk_nvme_qpair *qpair = req->qpair;
475 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
476 
477 	assert(active_proc->timeout_cb_fn != NULL);
478 
479 	if (req->timed_out || req->submit_tick == 0) {
480 		return 0;
481 	}
482 
483 	if (req->pid != g_spdk_nvme_pid) {
484 		return 0;
485 	}
486 
487 	if (nvme_qpair_is_admin_queue(qpair) &&
488 	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
489 		return 0;
490 	}
491 
492 	if (req->submit_tick + active_proc->timeout_ticks > now_tick) {
493 		return 1;
494 	}
495 
496 	req->timed_out = true;
497 
498 	/*
499 	 * We don't want to expose the admin queue to the user,
500 	 * so when we're timing out admin commands set the
501 	 * qpair to NULL.
502 	 */
503 	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
504 				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
505 				   cid);
506 	return 0;
507 }
508 
509 int
510 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
511 {
512 	int rc = 0;
513 
514 #ifdef __FreeBSD__
515 	pthread_mutex_init(mtx, NULL);
516 #else
517 	pthread_mutexattr_t attr;
518 
519 	if (pthread_mutexattr_init(&attr)) {
520 		return -1;
521 	}
522 	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
523 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
524 	    pthread_mutex_init(mtx, &attr)) {
525 		rc = -1;
526 	}
527 	pthread_mutexattr_destroy(&attr);
528 #endif
529 
530 	return rc;
531 }
532 
533 int
534 nvme_driver_init(void)
535 {
536 	static pthread_mutex_t g_init_mutex = PTHREAD_MUTEX_INITIALIZER;
537 	int ret = 0;
538 	/* Any socket ID */
539 	int socket_id = -1;
540 
541 	/* Use a special process-private mutex to ensure the global
542 	 * nvme driver object (g_spdk_nvme_driver) gets initialized by
543 	 * only one thread.  Once that object is established and its
544 	 * mutex is initialized, we can unlock this mutex and use that
545 	 * one instead.
546 	 */
547 	pthread_mutex_lock(&g_init_mutex);
548 
549 	/* Each process needs its own pid. */
550 	g_spdk_nvme_pid = getpid();
551 
552 	/*
553 	 * Only one thread from one process will do this driver init work.
554 	 * The primary process will reserve the shared memory and do the
555 	 *  initialization.
556 	 * The secondary process will lookup the existing reserved memory.
557 	 */
558 	if (spdk_process_is_primary()) {
559 		/* The unique named memzone already reserved. */
560 		if (g_spdk_nvme_driver != NULL) {
561 			pthread_mutex_unlock(&g_init_mutex);
562 			return 0;
563 		} else {
564 			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
565 					     sizeof(struct nvme_driver), socket_id,
566 					     SPDK_MEMZONE_NO_IOVA_CONTIG);
567 		}
568 
569 		if (g_spdk_nvme_driver == NULL) {
570 			SPDK_ERRLOG("primary process failed to reserve memory\n");
571 			pthread_mutex_unlock(&g_init_mutex);
572 			return -1;
573 		}
574 	} else {
575 		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
576 
577 		/* The unique named memzone already reserved by the primary process. */
578 		if (g_spdk_nvme_driver != NULL) {
579 			int ms_waited = 0;
580 
581 			/* Wait the nvme driver to get initialized. */
582 			while ((g_spdk_nvme_driver->initialized == false) &&
583 			       (ms_waited < g_nvme_driver_timeout_ms)) {
584 				ms_waited++;
585 				nvme_delay(1000); /* delay 1ms */
586 			}
587 			if (g_spdk_nvme_driver->initialized == false) {
588 				SPDK_ERRLOG("timeout waiting for primary process to init\n");
589 				pthread_mutex_unlock(&g_init_mutex);
590 				return -1;
591 			}
592 		} else {
593 			SPDK_ERRLOG("primary process is not started yet\n");
594 			pthread_mutex_unlock(&g_init_mutex);
595 			return -1;
596 		}
597 
598 		pthread_mutex_unlock(&g_init_mutex);
599 		return 0;
600 	}
601 
602 	/*
603 	 * At this moment, only one thread from the primary process will do
604 	 * the g_spdk_nvme_driver initialization
605 	 */
606 	assert(spdk_process_is_primary());
607 
608 	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
609 	if (ret != 0) {
610 		SPDK_ERRLOG("failed to initialize mutex\n");
611 		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
612 		pthread_mutex_unlock(&g_init_mutex);
613 		return ret;
614 	}
615 
616 	/* The lock in the shared g_spdk_nvme_driver object is now ready to
617 	 * be used - so we can unlock the g_init_mutex here.
618 	 */
619 	pthread_mutex_unlock(&g_init_mutex);
620 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
621 
622 	g_spdk_nvme_driver->initialized = false;
623 	g_spdk_nvme_driver->hotplug_fd = nvme_uevent_connect();
624 	if (g_spdk_nvme_driver->hotplug_fd < 0) {
625 		SPDK_DEBUGLOG(nvme, "Failed to open uevent netlink socket\n");
626 	}
627 
628 	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
629 
630 	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
631 
632 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
633 
634 	return ret;
635 }
636 
637 /* This function must only be called while holding g_spdk_nvme_driver->lock */
638 int
639 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
640 		 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle)
641 {
642 	struct spdk_nvme_ctrlr *ctrlr;
643 	struct spdk_nvme_ctrlr_opts opts;
644 
645 	assert(trid != NULL);
646 
647 	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
648 
649 	if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) {
650 		ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
651 		if (ctrlr) {
652 			/* This ctrlr already exists. */
653 
654 			if (ctrlr->is_destructed) {
655 				/* This ctrlr is being destructed asynchronously. */
656 				SPDK_ERRLOG("NVMe controller for SSD: %s is being destructed\n",
657 					    trid->traddr);
658 				return -EBUSY;
659 			}
660 
661 			/* Increase the ref count before calling attach_cb() as the user may
662 			* call nvme_detach() immediately. */
663 			nvme_ctrlr_proc_get_ref(ctrlr);
664 
665 			if (probe_ctx->attach_cb) {
666 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
667 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
668 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
669 			}
670 			return 0;
671 		}
672 
673 		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
674 		if (ctrlr == NULL) {
675 			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
676 			return -1;
677 		}
678 		ctrlr->remove_cb = probe_ctx->remove_cb;
679 		ctrlr->cb_ctx = probe_ctx->cb_ctx;
680 
681 		if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE &&
682 		    ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) {
683 			/* If the user specifically set an IO queue size different than the
684 			 * default, use that value.  Otherwise overwrite with the quirked value.
685 			 * This allows this quirk to be overridden when necessary.
686 			 * However, cap.mqes still needs to be respected.
687 			 */
688 			ctrlr->opts.io_queue_size = spdk_min(DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK, ctrlr->cap.bits.mqes + 1u);
689 		}
690 
691 		nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
692 		TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
693 		return 0;
694 	}
695 
696 	return 1;
697 }
698 
699 static void
700 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
701 			 struct spdk_nvme_probe_ctx *probe_ctx)
702 {
703 	int rc = 0;
704 
705 	rc = nvme_ctrlr_process_init(ctrlr);
706 
707 	if (rc) {
708 		/* Controller failed to initialize. */
709 		TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
710 		SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
711 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
712 		nvme_ctrlr_fail(ctrlr, false);
713 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
714 		nvme_ctrlr_destruct(ctrlr);
715 		return;
716 	}
717 
718 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
719 		return;
720 	}
721 
722 	STAILQ_INIT(&ctrlr->io_producers);
723 
724 	/*
725 	 * Controller has been initialized.
726 	 *  Move it to the attached_ctrlrs list.
727 	 */
728 	TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
729 
730 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
731 	if (nvme_ctrlr_shared(ctrlr)) {
732 		TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
733 	} else {
734 		TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
735 	}
736 
737 	/*
738 	 * Increase the ref count before calling attach_cb() as the user may
739 	 * call nvme_detach() immediately.
740 	 */
741 	nvme_ctrlr_proc_get_ref(ctrlr);
742 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
743 
744 	if (probe_ctx->attach_cb) {
745 		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
746 	}
747 }
748 
749 static int
750 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx)
751 {
752 	int rc = 0;
753 
754 	while (true) {
755 		rc = spdk_nvme_probe_poll_async(probe_ctx);
756 		if (rc != -EAGAIN) {
757 			return rc;
758 		}
759 	}
760 
761 	return rc;
762 }
763 
764 /* This function must not be called while holding g_spdk_nvme_driver->lock */
765 static struct spdk_nvme_ctrlr *
766 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid)
767 {
768 	struct spdk_nvme_ctrlr *ctrlr;
769 
770 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
771 	ctrlr = nvme_get_ctrlr_by_trid_unsafe(trid);
772 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
773 
774 	return ctrlr;
775 }
776 
777 /* This function must be called while holding g_spdk_nvme_driver->lock */
778 struct spdk_nvme_ctrlr *
779 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid)
780 {
781 	struct spdk_nvme_ctrlr *ctrlr;
782 
783 	/* Search per-process list */
784 	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
785 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
786 			return ctrlr;
787 		}
788 	}
789 
790 	/* Search multi-process shared list */
791 	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
792 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
793 			return ctrlr;
794 		}
795 	}
796 
797 	return NULL;
798 }
799 
800 /* This function must only be called while holding g_spdk_nvme_driver->lock */
801 static int
802 nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx,
803 		    bool direct_connect)
804 {
805 	int rc;
806 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
807 
808 	spdk_nvme_trid_populate_transport(&probe_ctx->trid, probe_ctx->trid.trtype);
809 	if (!spdk_nvme_transport_available_by_name(probe_ctx->trid.trstring)) {
810 		SPDK_ERRLOG("NVMe trtype %u not available\n", probe_ctx->trid.trtype);
811 		return -1;
812 	}
813 
814 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
815 
816 	rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect);
817 	if (rc != 0) {
818 		SPDK_ERRLOG("NVMe ctrlr scan failed\n");
819 		TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
820 			TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
821 			nvme_transport_ctrlr_destruct(ctrlr);
822 		}
823 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
824 		return -1;
825 	}
826 
827 	/*
828 	 * Probe controllers on the shared_attached_ctrlrs list
829 	 */
830 	if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
831 		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
832 			/* Do not attach other ctrlrs if user specify a valid trid */
833 			if ((strlen(probe_ctx->trid.traddr) != 0) &&
834 			    (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) {
835 				continue;
836 			}
837 
838 			/* Do not attach if we failed to initialize it in this process */
839 			if (nvme_ctrlr_get_current_process(ctrlr) == NULL) {
840 				continue;
841 			}
842 
843 			nvme_ctrlr_proc_get_ref(ctrlr);
844 
845 			/*
846 			 * Unlock while calling attach_cb() so the user can call other functions
847 			 *  that may take the driver lock, like nvme_detach().
848 			 */
849 			if (probe_ctx->attach_cb) {
850 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
851 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
852 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
853 			}
854 		}
855 	}
856 
857 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
858 
859 	return 0;
860 }
861 
862 static void
863 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
864 		    const struct spdk_nvme_transport_id *trid,
865 		    void *cb_ctx,
866 		    spdk_nvme_probe_cb probe_cb,
867 		    spdk_nvme_attach_cb attach_cb,
868 		    spdk_nvme_remove_cb remove_cb)
869 {
870 	probe_ctx->trid = *trid;
871 	probe_ctx->cb_ctx = cb_ctx;
872 	probe_ctx->probe_cb = probe_cb;
873 	probe_ctx->attach_cb = attach_cb;
874 	probe_ctx->remove_cb = remove_cb;
875 	TAILQ_INIT(&probe_ctx->init_ctrlrs);
876 }
877 
878 int
879 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
880 		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
881 		spdk_nvme_remove_cb remove_cb)
882 {
883 	struct spdk_nvme_transport_id trid_pcie;
884 	struct spdk_nvme_probe_ctx *probe_ctx;
885 
886 	if (trid == NULL) {
887 		memset(&trid_pcie, 0, sizeof(trid_pcie));
888 		spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
889 		trid = &trid_pcie;
890 	}
891 
892 	probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb,
893 					  attach_cb, remove_cb);
894 	if (!probe_ctx) {
895 		SPDK_ERRLOG("Create probe context failed\n");
896 		return -1;
897 	}
898 
899 	/*
900 	 * Keep going even if one or more nvme_attach() calls failed,
901 	 *  but maintain the value of rc to signal errors when we return.
902 	 */
903 	return nvme_init_controllers(probe_ctx);
904 }
905 
906 static bool
907 nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
908 		      struct spdk_nvme_ctrlr_opts *opts)
909 {
910 	struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx;
911 
912 	assert(requested_opts);
913 	memcpy(opts, requested_opts, sizeof(*opts));
914 
915 	return true;
916 }
917 
918 static void
919 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts *opts,
920 		     const struct spdk_nvme_ctrlr_opts *opts_user,
921 		     size_t opts_size_user)
922 {
923 	assert(opts);
924 	assert(opts_user);
925 
926 	spdk_nvme_ctrlr_get_default_ctrlr_opts(opts, opts_size_user);
927 
928 #define FIELD_OK(field) \
929         offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size)
930 
931 	if (FIELD_OK(num_io_queues)) {
932 		opts->num_io_queues = opts_user->num_io_queues;
933 	}
934 
935 	if (FIELD_OK(use_cmb_sqs)) {
936 		opts->use_cmb_sqs = opts_user->use_cmb_sqs;
937 	}
938 
939 	if (FIELD_OK(no_shn_notification)) {
940 		opts->no_shn_notification = opts_user->no_shn_notification;
941 	}
942 
943 	if (FIELD_OK(arb_mechanism)) {
944 		opts->arb_mechanism = opts_user->arb_mechanism;
945 	}
946 
947 	if (FIELD_OK(arbitration_burst)) {
948 		opts->arbitration_burst = opts_user->arbitration_burst;
949 	}
950 
951 	if (FIELD_OK(low_priority_weight)) {
952 		opts->low_priority_weight = opts_user->low_priority_weight;
953 	}
954 
955 	if (FIELD_OK(medium_priority_weight)) {
956 		opts->medium_priority_weight = opts_user->medium_priority_weight;
957 	}
958 
959 	if (FIELD_OK(high_priority_weight)) {
960 		opts->high_priority_weight = opts_user->high_priority_weight;
961 	}
962 
963 	if (FIELD_OK(keep_alive_timeout_ms)) {
964 		opts->keep_alive_timeout_ms =  opts_user->keep_alive_timeout_ms;
965 	}
966 
967 	if (FIELD_OK(transport_retry_count)) {
968 		opts->transport_retry_count = opts_user->transport_retry_count;
969 	}
970 
971 	if (FIELD_OK(io_queue_size)) {
972 		opts->io_queue_size =  opts_user->io_queue_size;
973 	}
974 
975 	if (FIELD_OK(hostnqn)) {
976 		memcpy(opts->hostnqn, opts_user->hostnqn, sizeof(opts_user->hostnqn));
977 	}
978 
979 	if (FIELD_OK(io_queue_requests)) {
980 		opts->io_queue_requests =  opts_user->io_queue_requests;
981 	}
982 
983 	if (FIELD_OK(src_addr)) {
984 		memcpy(opts->src_addr, opts_user->src_addr, sizeof(opts_user->src_addr));
985 	}
986 
987 	if (FIELD_OK(src_svcid)) {
988 		memcpy(opts->src_svcid, opts_user->src_svcid, sizeof(opts_user->src_svcid));
989 	}
990 
991 	if (FIELD_OK(host_id)) {
992 		memcpy(opts->host_id, opts_user->host_id, sizeof(opts_user->host_id));
993 	}
994 	if (FIELD_OK(extended_host_id)) {
995 		memcpy(opts->extended_host_id, opts_user->extended_host_id,
996 		       sizeof(opts_user->extended_host_id));
997 	}
998 
999 	if (FIELD_OK(command_set)) {
1000 		opts->command_set = opts_user->command_set;
1001 	}
1002 
1003 	if (FIELD_OK(admin_timeout_ms)) {
1004 		opts->admin_timeout_ms = opts_user->admin_timeout_ms;
1005 	}
1006 
1007 	if (FIELD_OK(header_digest)) {
1008 		opts->header_digest = opts_user->header_digest;
1009 	}
1010 
1011 	if (FIELD_OK(data_digest)) {
1012 		opts->data_digest = opts_user->data_digest;
1013 	}
1014 
1015 	if (FIELD_OK(disable_error_logging)) {
1016 		opts->disable_error_logging = opts_user->disable_error_logging;
1017 	}
1018 
1019 	if (FIELD_OK(transport_ack_timeout)) {
1020 		opts->transport_ack_timeout = opts_user->transport_ack_timeout;
1021 	}
1022 
1023 	if (FIELD_OK(admin_queue_size)) {
1024 		opts->admin_queue_size = opts_user->admin_queue_size;
1025 	}
1026 #undef FIELD_OK
1027 }
1028 
1029 struct spdk_nvme_ctrlr *
1030 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
1031 		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
1032 {
1033 	int rc;
1034 	struct spdk_nvme_ctrlr *ctrlr = NULL;
1035 	struct spdk_nvme_probe_ctx *probe_ctx;
1036 	struct spdk_nvme_ctrlr_opts *opts_local_p = NULL;
1037 	struct spdk_nvme_ctrlr_opts opts_local;
1038 
1039 	if (trid == NULL) {
1040 		SPDK_ERRLOG("No transport ID specified\n");
1041 		return NULL;
1042 	}
1043 
1044 	if (opts) {
1045 		opts_local_p = &opts_local;
1046 		nvme_ctrlr_opts_init(opts_local_p, opts, opts_size);
1047 	}
1048 
1049 	probe_ctx = spdk_nvme_connect_async(trid, opts_local_p, NULL);
1050 	if (!probe_ctx) {
1051 		SPDK_ERRLOG("Create probe context failed\n");
1052 		return NULL;
1053 	}
1054 
1055 	rc = nvme_init_controllers(probe_ctx);
1056 	if (rc != 0) {
1057 		return NULL;
1058 	}
1059 
1060 	ctrlr = nvme_get_ctrlr_by_trid(trid);
1061 
1062 	return ctrlr;
1063 }
1064 
1065 void
1066 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id *trid,
1067 				  enum spdk_nvme_transport_type trtype)
1068 {
1069 	const char *trstring = "";
1070 
1071 	trid->trtype = trtype;
1072 	switch (trtype) {
1073 	case SPDK_NVME_TRANSPORT_FC:
1074 		trstring = SPDK_NVME_TRANSPORT_NAME_FC;
1075 		break;
1076 	case SPDK_NVME_TRANSPORT_PCIE:
1077 		trstring = SPDK_NVME_TRANSPORT_NAME_PCIE;
1078 		break;
1079 	case SPDK_NVME_TRANSPORT_RDMA:
1080 		trstring = SPDK_NVME_TRANSPORT_NAME_RDMA;
1081 		break;
1082 	case SPDK_NVME_TRANSPORT_TCP:
1083 		trstring = SPDK_NVME_TRANSPORT_NAME_TCP;
1084 		break;
1085 	case SPDK_NVME_TRANSPORT_CUSTOM:
1086 		trstring = SPDK_NVME_TRANSPORT_NAME_CUSTOM;
1087 		break;
1088 	default:
1089 		SPDK_ERRLOG("no available transports\n");
1090 		assert(0);
1091 		return;
1092 	}
1093 	snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1094 }
1095 
1096 int
1097 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring)
1098 {
1099 	int len, i, rc;
1100 
1101 	if (trstring == NULL) {
1102 		return -EINVAL;
1103 	}
1104 
1105 	len = strnlen(trstring, SPDK_NVMF_TRSTRING_MAX_LEN);
1106 	if (len == SPDK_NVMF_TRSTRING_MAX_LEN) {
1107 		return -EINVAL;
1108 	}
1109 
1110 	rc = snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "%s", trstring);
1111 	if (rc < 0) {
1112 		return rc;
1113 	}
1114 
1115 	/* cast official trstring to uppercase version of input. */
1116 	for (i = 0; i < len; i++) {
1117 		trid->trstring[i] = toupper(trid->trstring[i]);
1118 	}
1119 	return 0;
1120 }
1121 
1122 int
1123 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
1124 {
1125 	if (trtype == NULL || str == NULL) {
1126 		return -EINVAL;
1127 	}
1128 
1129 	if (strcasecmp(str, "PCIe") == 0) {
1130 		*trtype = SPDK_NVME_TRANSPORT_PCIE;
1131 	} else if (strcasecmp(str, "RDMA") == 0) {
1132 		*trtype = SPDK_NVME_TRANSPORT_RDMA;
1133 	} else if (strcasecmp(str, "FC") == 0) {
1134 		*trtype = SPDK_NVME_TRANSPORT_FC;
1135 	} else if (strcasecmp(str, "TCP") == 0) {
1136 		*trtype = SPDK_NVME_TRANSPORT_TCP;
1137 	} else {
1138 		*trtype = SPDK_NVME_TRANSPORT_CUSTOM;
1139 	}
1140 	return 0;
1141 }
1142 
1143 const char *
1144 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
1145 {
1146 	switch (trtype) {
1147 	case SPDK_NVME_TRANSPORT_PCIE:
1148 		return "PCIe";
1149 	case SPDK_NVME_TRANSPORT_RDMA:
1150 		return "RDMA";
1151 	case SPDK_NVME_TRANSPORT_FC:
1152 		return "FC";
1153 	case SPDK_NVME_TRANSPORT_TCP:
1154 		return "TCP";
1155 	case SPDK_NVME_TRANSPORT_CUSTOM:
1156 		return "CUSTOM";
1157 	default:
1158 		return NULL;
1159 	}
1160 }
1161 
1162 int
1163 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
1164 {
1165 	if (adrfam == NULL || str == NULL) {
1166 		return -EINVAL;
1167 	}
1168 
1169 	if (strcasecmp(str, "IPv4") == 0) {
1170 		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
1171 	} else if (strcasecmp(str, "IPv6") == 0) {
1172 		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
1173 	} else if (strcasecmp(str, "IB") == 0) {
1174 		*adrfam = SPDK_NVMF_ADRFAM_IB;
1175 	} else if (strcasecmp(str, "FC") == 0) {
1176 		*adrfam = SPDK_NVMF_ADRFAM_FC;
1177 	} else {
1178 		return -ENOENT;
1179 	}
1180 	return 0;
1181 }
1182 
1183 const char *
1184 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
1185 {
1186 	switch (adrfam) {
1187 	case SPDK_NVMF_ADRFAM_IPV4:
1188 		return "IPv4";
1189 	case SPDK_NVMF_ADRFAM_IPV6:
1190 		return "IPv6";
1191 	case SPDK_NVMF_ADRFAM_IB:
1192 		return "IB";
1193 	case SPDK_NVMF_ADRFAM_FC:
1194 		return "FC";
1195 	default:
1196 		return NULL;
1197 	}
1198 }
1199 
1200 static size_t
1201 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size)
1202 {
1203 
1204 	const char *sep, *sep1;
1205 	const char *whitespace = " \t\n";
1206 	size_t key_len, val_len;
1207 
1208 	*str += strspn(*str, whitespace);
1209 
1210 	sep = strchr(*str, ':');
1211 	if (!sep) {
1212 		sep = strchr(*str, '=');
1213 		if (!sep) {
1214 			SPDK_ERRLOG("Key without ':' or '=' separator\n");
1215 			return 0;
1216 		}
1217 	} else {
1218 		sep1 = strchr(*str, '=');
1219 		if ((sep1 != NULL) && (sep1 < sep)) {
1220 			sep = sep1;
1221 		}
1222 	}
1223 
1224 	key_len = sep - *str;
1225 	if (key_len >= key_buf_size) {
1226 		SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
1227 			    key_len, key_buf_size - 1);
1228 		return 0;
1229 	}
1230 
1231 	memcpy(key, *str, key_len);
1232 	key[key_len] = '\0';
1233 
1234 	*str += key_len + 1; /* Skip key: */
1235 	val_len = strcspn(*str, whitespace);
1236 	if (val_len == 0) {
1237 		SPDK_ERRLOG("Key without value\n");
1238 		return 0;
1239 	}
1240 
1241 	if (val_len >= val_buf_size) {
1242 		SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
1243 			    val_len, val_buf_size - 1);
1244 		return 0;
1245 	}
1246 
1247 	memcpy(val, *str, val_len);
1248 	val[val_len] = '\0';
1249 
1250 	*str += val_len;
1251 
1252 	return val_len;
1253 }
1254 
1255 int
1256 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
1257 {
1258 	size_t val_len;
1259 	char key[32];
1260 	char val[1024];
1261 
1262 	if (trid == NULL || str == NULL) {
1263 		return -EINVAL;
1264 	}
1265 
1266 	while (*str != '\0') {
1267 
1268 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1269 
1270 		if (val_len == 0) {
1271 			SPDK_ERRLOG("Failed to parse transport ID\n");
1272 			return -EINVAL;
1273 		}
1274 
1275 		if (strcasecmp(key, "trtype") == 0) {
1276 			if (spdk_nvme_transport_id_populate_trstring(trid, val) != 0) {
1277 				SPDK_ERRLOG("invalid transport '%s'\n", val);
1278 				return -EINVAL;
1279 			}
1280 			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
1281 				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
1282 				return -EINVAL;
1283 			}
1284 		} else if (strcasecmp(key, "adrfam") == 0) {
1285 			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
1286 				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
1287 				return -EINVAL;
1288 			}
1289 		} else if (strcasecmp(key, "traddr") == 0) {
1290 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1291 				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
1292 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1293 				return -EINVAL;
1294 			}
1295 			memcpy(trid->traddr, val, val_len + 1);
1296 		} else if (strcasecmp(key, "trsvcid") == 0) {
1297 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1298 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1299 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1300 				return -EINVAL;
1301 			}
1302 			memcpy(trid->trsvcid, val, val_len + 1);
1303 		} else if (strcasecmp(key, "priority") == 0) {
1304 			if (val_len > SPDK_NVMF_PRIORITY_MAX_LEN) {
1305 				SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n",
1306 					    val_len, SPDK_NVMF_PRIORITY_MAX_LEN);
1307 				return -EINVAL;
1308 			}
1309 			trid->priority = spdk_strtol(val, 10);
1310 		} else if (strcasecmp(key, "subnqn") == 0) {
1311 			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
1312 				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
1313 					    val_len, SPDK_NVMF_NQN_MAX_LEN);
1314 				return -EINVAL;
1315 			}
1316 			memcpy(trid->subnqn, val, val_len + 1);
1317 		} else if (strcasecmp(key, "hostaddr") == 0) {
1318 			continue;
1319 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1320 			continue;
1321 		} else if (strcasecmp(key, "ns") == 0) {
1322 			/*
1323 			 * Special case.  The namespace id parameter may
1324 			 * optionally be passed in the transport id string
1325 			 * for an SPDK application (e.g. nvme/perf)
1326 			 * and additionally parsed therein to limit
1327 			 * targeting a specific namespace.  For this
1328 			 * scenario, just silently ignore this key
1329 			 * rather than letting it default to logging
1330 			 * it as an invalid key.
1331 			 */
1332 			continue;
1333 		} else if (strcasecmp(key, "alt_traddr") == 0) {
1334 			/*
1335 			 * Used by applications for enabling transport ID failover.
1336 			 * Please see the case above for more information on custom parameters.
1337 			 */
1338 			continue;
1339 		} else {
1340 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1341 		}
1342 	}
1343 
1344 	return 0;
1345 }
1346 
1347 int
1348 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str)
1349 {
1350 
1351 	size_t key_size = 32;
1352 	size_t val_size = 1024;
1353 	size_t val_len;
1354 	char key[key_size];
1355 	char val[val_size];
1356 
1357 	if (hostid == NULL || str == NULL) {
1358 		return -EINVAL;
1359 	}
1360 
1361 	while (*str != '\0') {
1362 
1363 		val_len = parse_next_key(&str, key, val, key_size, val_size);
1364 
1365 		if (val_len == 0) {
1366 			SPDK_ERRLOG("Failed to parse host ID\n");
1367 			return val_len;
1368 		}
1369 
1370 		/* Ignore the rest of the options from the transport ID. */
1371 		if (strcasecmp(key, "trtype") == 0) {
1372 			continue;
1373 		} else if (strcasecmp(key, "adrfam") == 0) {
1374 			continue;
1375 		} else if (strcasecmp(key, "traddr") == 0) {
1376 			continue;
1377 		} else if (strcasecmp(key, "trsvcid") == 0) {
1378 			continue;
1379 		} else if (strcasecmp(key, "subnqn") == 0) {
1380 			continue;
1381 		} else if (strcasecmp(key, "priority") == 0) {
1382 			continue;
1383 		} else if (strcasecmp(key, "ns") == 0) {
1384 			continue;
1385 		} else if (strcasecmp(key, "hostaddr") == 0) {
1386 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
1387 				SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
1388 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
1389 				return -EINVAL;
1390 			}
1391 			memcpy(hostid->hostaddr, val, val_len + 1);
1392 
1393 		} else if (strcasecmp(key, "hostsvcid") == 0) {
1394 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
1395 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1396 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
1397 				return -EINVAL;
1398 			}
1399 			memcpy(hostid->hostsvcid, val, val_len + 1);
1400 		} else {
1401 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
1402 		}
1403 	}
1404 
1405 	return 0;
1406 }
1407 
1408 static int
1409 cmp_int(int a, int b)
1410 {
1411 	return a - b;
1412 }
1413 
1414 int
1415 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
1416 			       const struct spdk_nvme_transport_id *trid2)
1417 {
1418 	int cmp;
1419 
1420 	if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
1421 		cmp = strcasecmp(trid1->trstring, trid2->trstring);
1422 	} else {
1423 		cmp = cmp_int(trid1->trtype, trid2->trtype);
1424 	}
1425 
1426 	if (cmp) {
1427 		return cmp;
1428 	}
1429 
1430 	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
1431 		struct spdk_pci_addr pci_addr1 = {};
1432 		struct spdk_pci_addr pci_addr2 = {};
1433 
1434 		/* Normalize PCI addresses before comparing */
1435 		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
1436 		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
1437 			return -1;
1438 		}
1439 
1440 		/* PCIe transport ID only uses trtype and traddr */
1441 		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
1442 	}
1443 
1444 	cmp = strcasecmp(trid1->traddr, trid2->traddr);
1445 	if (cmp) {
1446 		return cmp;
1447 	}
1448 
1449 	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
1450 	if (cmp) {
1451 		return cmp;
1452 	}
1453 
1454 	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
1455 	if (cmp) {
1456 		return cmp;
1457 	}
1458 
1459 	cmp = strcmp(trid1->subnqn, trid2->subnqn);
1460 	if (cmp) {
1461 		return cmp;
1462 	}
1463 
1464 	return 0;
1465 }
1466 
1467 int
1468 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str)
1469 {
1470 	size_t val_len;
1471 	char key[32];
1472 	char val[1024];
1473 
1474 	if (prchk_flags == NULL || str == NULL) {
1475 		return -EINVAL;
1476 	}
1477 
1478 	while (*str != '\0') {
1479 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
1480 
1481 		if (val_len == 0) {
1482 			SPDK_ERRLOG("Failed to parse prchk\n");
1483 			return -EINVAL;
1484 		}
1485 
1486 		if (strcasecmp(key, "prchk") == 0) {
1487 			if (strcasestr(val, "reftag") != NULL) {
1488 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
1489 			}
1490 			if (strcasestr(val, "guard") != NULL) {
1491 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
1492 			}
1493 		} else {
1494 			SPDK_ERRLOG("Unknown key '%s'\n", key);
1495 			return -EINVAL;
1496 		}
1497 	}
1498 
1499 	return 0;
1500 }
1501 
1502 const char *
1503 spdk_nvme_prchk_flags_str(uint32_t prchk_flags)
1504 {
1505 	if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
1506 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1507 			return "prchk:reftag|guard";
1508 		} else {
1509 			return "prchk:reftag";
1510 		}
1511 	} else {
1512 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1513 			return "prchk:guard";
1514 		} else {
1515 			return NULL;
1516 		}
1517 	}
1518 }
1519 
1520 struct spdk_nvme_probe_ctx *
1521 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
1522 		      void *cb_ctx,
1523 		      spdk_nvme_probe_cb probe_cb,
1524 		      spdk_nvme_attach_cb attach_cb,
1525 		      spdk_nvme_remove_cb remove_cb)
1526 {
1527 	int rc;
1528 	struct spdk_nvme_probe_ctx *probe_ctx;
1529 
1530 	rc = nvme_driver_init();
1531 	if (rc != 0) {
1532 		return NULL;
1533 	}
1534 
1535 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1536 	if (!probe_ctx) {
1537 		return NULL;
1538 	}
1539 
1540 	nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb);
1541 	rc = nvme_probe_internal(probe_ctx, false);
1542 	if (rc != 0) {
1543 		free(probe_ctx);
1544 		return NULL;
1545 	}
1546 
1547 	return probe_ctx;
1548 }
1549 
1550 int
1551 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
1552 {
1553 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
1554 
1555 	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1556 		free(probe_ctx);
1557 		return 0;
1558 	}
1559 
1560 	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
1561 		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
1562 	}
1563 
1564 	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
1565 		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1566 		g_spdk_nvme_driver->initialized = true;
1567 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1568 		free(probe_ctx);
1569 		return 0;
1570 	}
1571 
1572 	return -EAGAIN;
1573 }
1574 
1575 struct spdk_nvme_probe_ctx *
1576 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
1577 			const struct spdk_nvme_ctrlr_opts *opts,
1578 			spdk_nvme_attach_cb attach_cb)
1579 {
1580 	int rc;
1581 	spdk_nvme_probe_cb probe_cb = NULL;
1582 	struct spdk_nvme_probe_ctx *probe_ctx;
1583 
1584 	rc = nvme_driver_init();
1585 	if (rc != 0) {
1586 		return NULL;
1587 	}
1588 
1589 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1590 	if (!probe_ctx) {
1591 		return NULL;
1592 	}
1593 
1594 	if (opts) {
1595 		probe_cb = nvme_connect_probe_cb;
1596 	}
1597 
1598 	nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL);
1599 	rc = nvme_probe_internal(probe_ctx, true);
1600 	if (rc != 0) {
1601 		free(probe_ctx);
1602 		return NULL;
1603 	}
1604 
1605 	return probe_ctx;
1606 }
1607 
1608 SPDK_LOG_REGISTER_COMPONENT(nvme)
1609