xref: /spdk/lib/nvme/nvme.c (revision fa2d95b3fe66e7f5c543eaef89fa00d4eaa0e6e7)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/nvmf_spec.h"
35 #include "nvme_internal.h"
36 
37 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
38 
39 struct nvme_driver	*g_spdk_nvme_driver;
40 pid_t			g_spdk_nvme_pid;
41 
42 int32_t			spdk_nvme_retry_count;
43 
44 /* gross timeout of 180 seconds in milliseconds */
45 static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
46 
47 /* Per-process attached controller list */
48 static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
49 	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
50 
51 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
52 static bool
53 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
54 {
55 	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
56 }
57 
58 void
59 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
60 		     struct spdk_nvme_ctrlr *ctrlr)
61 {
62 	TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
63 }
64 
65 int
66 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
67 {
68 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
69 
70 	nvme_ctrlr_proc_put_ref(ctrlr);
71 
72 	if (nvme_ctrlr_get_ref_count(ctrlr) == 0) {
73 		if (nvme_ctrlr_shared(ctrlr)) {
74 			TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
75 		} else {
76 			TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
77 		}
78 		nvme_ctrlr_destruct(ctrlr);
79 	}
80 
81 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
82 	return 0;
83 }
84 
85 void
86 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
87 {
88 	struct nvme_completion_poll_status	*status = arg;
89 
90 	/*
91 	 * Copy status into the argument passed by the caller, so that
92 	 *  the caller can check the status to determine if the
93 	 *  the request passed or failed.
94 	 */
95 	memcpy(&status->cpl, cpl, sizeof(*cpl));
96 	status->done = true;
97 }
98 
99 /**
100  * Poll qpair for completions until a command completes.
101  *
102  * \param qpair queue to poll
103  * \param status completion status
104  * \param robust_mutex optional robust mutex to lock while polling qpair
105  *
106  * \return 0 if command completed without error, negative errno on failure
107  *
108  * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
109  * and status as the callback argument.
110  */
111 int
112 spdk_nvme_wait_for_completion_robust_lock(
113 	struct spdk_nvme_qpair *qpair,
114 	struct nvme_completion_poll_status *status,
115 	pthread_mutex_t *robust_mutex)
116 {
117 	memset(&status->cpl, 0, sizeof(status->cpl));
118 	status->done = false;
119 
120 	while (status->done == false) {
121 		if (robust_mutex) {
122 			nvme_robust_mutex_lock(robust_mutex);
123 		}
124 
125 		spdk_nvme_qpair_process_completions(qpair, 0);
126 
127 		if (robust_mutex) {
128 			nvme_robust_mutex_unlock(robust_mutex);
129 		}
130 	}
131 
132 	return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0;
133 }
134 
135 int
136 spdk_nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
137 			      struct nvme_completion_poll_status *status)
138 {
139 	return spdk_nvme_wait_for_completion_robust_lock(qpair, status, NULL);
140 }
141 
142 int
143 spdk_nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
144 				      struct nvme_completion_poll_status *status,
145 				      uint64_t timeout_in_secs)
146 {
147 	uint64_t timeout_tsc = 0;
148 
149 	memset(&status->cpl, 0, sizeof(status->cpl));
150 	status->done = false;
151 	if (timeout_in_secs) {
152 		timeout_tsc = spdk_get_ticks() + timeout_in_secs * spdk_get_ticks_hz();
153 	}
154 
155 	while (status->done == false) {
156 		spdk_nvme_qpair_process_completions(qpair, 0);
157 		if (timeout_tsc && spdk_get_ticks() > timeout_tsc) {
158 			break;
159 		}
160 	}
161 
162 	if (status->done == false) {
163 		return -EIO;
164 	}
165 
166 	return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0;
167 }
168 
169 static void
170 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
171 {
172 	struct nvme_request *req = arg;
173 	enum spdk_nvme_data_transfer xfer;
174 
175 	if (req->user_buffer && req->payload_size) {
176 		/* Copy back to the user buffer and free the contig buffer */
177 		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
178 		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
179 		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
180 		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
181 			assert(req->pid == getpid());
182 			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
183 		}
184 
185 		spdk_dma_free(req->payload.contig_or_cb_arg);
186 	}
187 
188 	/* Call the user's original callback now that the buffer has been copied */
189 	req->user_cb_fn(req->user_cb_arg, cpl);
190 }
191 
192 /**
193  * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
194  *
195  * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
196  * where the overhead of a copy is not a problem.
197  */
198 struct nvme_request *
199 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
200 				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
201 				void *cb_arg, bool host_to_controller)
202 {
203 	struct nvme_request *req;
204 	void *dma_buffer = NULL;
205 
206 	if (buffer && payload_size) {
207 		dma_buffer = spdk_zmalloc(payload_size, 4096, NULL,
208 					  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
209 		if (!dma_buffer) {
210 			return NULL;
211 		}
212 
213 		if (host_to_controller) {
214 			memcpy(dma_buffer, buffer, payload_size);
215 		}
216 	}
217 
218 	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
219 					   NULL);
220 	if (!req) {
221 		spdk_free(dma_buffer);
222 		return NULL;
223 	}
224 
225 	req->user_cb_fn = cb_fn;
226 	req->user_cb_arg = cb_arg;
227 	req->user_buffer = buffer;
228 	req->cb_arg = req;
229 
230 	return req;
231 }
232 
233 /**
234  * Check if a request has exceeded the controller timeout.
235  *
236  * \param req request to check for timeout.
237  * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
238  * \param active_proc per-process data for the controller associated with req
239  * \param now_tick current time from spdk_get_ticks()
240  * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
241  * 1 if requests newer than req need not be checked.
242  *
243  * The request's timeout callback will be called if needed; the caller is only responsible for
244  * calling this function on each outstanding request.
245  */
246 int
247 nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
248 			   struct spdk_nvme_ctrlr_process *active_proc,
249 			   uint64_t now_tick)
250 {
251 	struct spdk_nvme_qpair *qpair = req->qpair;
252 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
253 
254 	assert(active_proc->timeout_cb_fn != NULL);
255 
256 	if (req->timed_out || req->submit_tick == 0) {
257 		return 0;
258 	}
259 
260 	if (req->pid != g_spdk_nvme_pid) {
261 		return 0;
262 	}
263 
264 	if (nvme_qpair_is_admin_queue(qpair) &&
265 	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
266 		return 0;
267 	}
268 
269 	if (req->submit_tick + active_proc->timeout_ticks > now_tick) {
270 		return 1;
271 	}
272 
273 	req->timed_out = true;
274 
275 	/*
276 	 * We don't want to expose the admin queue to the user,
277 	 * so when we're timing out admin commands set the
278 	 * qpair to NULL.
279 	 */
280 	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
281 				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
282 				   cid);
283 	return 0;
284 }
285 
286 int
287 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
288 {
289 	int rc = 0;
290 
291 #ifdef __FreeBSD__
292 	pthread_mutex_init(mtx, NULL);
293 #else
294 	pthread_mutexattr_t attr;
295 
296 	if (pthread_mutexattr_init(&attr)) {
297 		return -1;
298 	}
299 	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
300 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
301 	    pthread_mutex_init(mtx, &attr)) {
302 		rc = -1;
303 	}
304 	pthread_mutexattr_destroy(&attr);
305 #endif
306 
307 	return rc;
308 }
309 
310 int
311 nvme_driver_init(void)
312 {
313 	int ret = 0;
314 	/* Any socket ID */
315 	int socket_id = -1;
316 
317 	/* Each process needs its own pid. */
318 	g_spdk_nvme_pid = getpid();
319 
320 	/*
321 	 * Only one thread from one process will do this driver init work.
322 	 * The primary process will reserve the shared memory and do the
323 	 *  initialization.
324 	 * The secondary process will lookup the existing reserved memory.
325 	 */
326 	if (spdk_process_is_primary()) {
327 		/* The unique named memzone already reserved. */
328 		if (g_spdk_nvme_driver != NULL) {
329 			return 0;
330 		} else {
331 			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
332 					     sizeof(struct nvme_driver), socket_id,
333 					     SPDK_MEMZONE_NO_IOVA_CONTIG);
334 		}
335 
336 		if (g_spdk_nvme_driver == NULL) {
337 			SPDK_ERRLOG("primary process failed to reserve memory\n");
338 
339 			return -1;
340 		}
341 	} else {
342 		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
343 
344 		/* The unique named memzone already reserved by the primary process. */
345 		if (g_spdk_nvme_driver != NULL) {
346 			int ms_waited = 0;
347 
348 			/* Wait the nvme driver to get initialized. */
349 			while ((g_spdk_nvme_driver->initialized == false) &&
350 			       (ms_waited < g_nvme_driver_timeout_ms)) {
351 				ms_waited++;
352 				nvme_delay(1000); /* delay 1ms */
353 			}
354 			if (g_spdk_nvme_driver->initialized == false) {
355 				SPDK_ERRLOG("timeout waiting for primary process to init\n");
356 
357 				return -1;
358 			}
359 		} else {
360 			SPDK_ERRLOG("primary process is not started yet\n");
361 
362 			return -1;
363 		}
364 
365 		return 0;
366 	}
367 
368 	/*
369 	 * At this moment, only one thread from the primary process will do
370 	 * the g_spdk_nvme_driver initialization
371 	 */
372 	assert(spdk_process_is_primary());
373 
374 	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
375 	if (ret != 0) {
376 		SPDK_ERRLOG("failed to initialize mutex\n");
377 		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
378 		return ret;
379 	}
380 
381 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
382 
383 	g_spdk_nvme_driver->initialized = false;
384 
385 	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
386 
387 	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
388 
389 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
390 
391 	return ret;
392 }
393 
394 int
395 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
396 		 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle)
397 {
398 	struct spdk_nvme_ctrlr *ctrlr;
399 	struct spdk_nvme_ctrlr_opts opts;
400 
401 	assert(trid != NULL);
402 
403 	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
404 
405 	if (!probe_ctx->probe_cb || probe_ctx->probe_cb(probe_ctx->cb_ctx, trid, &opts)) {
406 		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
407 		if (ctrlr == NULL) {
408 			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
409 			return -1;
410 		}
411 
412 		TAILQ_INSERT_TAIL(&probe_ctx->init_ctrlrs, ctrlr, tailq);
413 		return 0;
414 	}
415 
416 	return 1;
417 }
418 
419 static int
420 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
421 			 struct spdk_nvme_probe_ctx *probe_ctx)
422 {
423 	int	rc = 0;
424 
425 	rc = nvme_ctrlr_process_init(ctrlr);
426 
427 	if (rc) {
428 		/* Controller failed to initialize. */
429 		TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
430 		SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
431 		nvme_ctrlr_destruct(ctrlr);
432 		return rc;
433 	}
434 
435 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
436 		return 0;
437 	}
438 
439 	/*
440 	 * Controller has been initialized.
441 	 *  Move it to the attached_ctrlrs list.
442 	 */
443 	TAILQ_REMOVE(&probe_ctx->init_ctrlrs, ctrlr, tailq);
444 
445 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
446 	if (nvme_ctrlr_shared(ctrlr)) {
447 		TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
448 	} else {
449 		TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
450 	}
451 
452 	/*
453 	 * Increase the ref count before calling attach_cb() as the user may
454 	 * call nvme_detach() immediately.
455 	 */
456 	nvme_ctrlr_proc_get_ref(ctrlr);
457 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
458 
459 	if (probe_ctx->attach_cb) {
460 		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
461 		return 0;
462 	}
463 
464 	return 0;
465 }
466 
467 static int
468 nvme_init_controllers(struct spdk_nvme_probe_ctx *probe_ctx)
469 {
470 	int rc = 0;
471 
472 	while (true) {
473 		rc = spdk_nvme_probe_poll_async(probe_ctx);
474 		if (rc != -EAGAIN) {
475 			return rc;
476 		}
477 	}
478 
479 	return rc;
480 }
481 
482 /* This function must not be called while holding g_spdk_nvme_driver->lock */
483 static struct spdk_nvme_ctrlr *
484 spdk_nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid)
485 {
486 	struct spdk_nvme_ctrlr *ctrlr;
487 
488 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
489 	ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(trid);
490 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
491 
492 	return ctrlr;
493 }
494 
495 /* This function must be called while holding g_spdk_nvme_driver->lock */
496 struct spdk_nvme_ctrlr *
497 spdk_nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid)
498 {
499 	struct spdk_nvme_ctrlr *ctrlr;
500 
501 	/* Search per-process list */
502 	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
503 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
504 			return ctrlr;
505 		}
506 	}
507 
508 	/* Search multi-process shared list */
509 	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
510 		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
511 			return ctrlr;
512 		}
513 	}
514 
515 	return NULL;
516 }
517 
518 /* This function must only be called while holding g_spdk_nvme_driver->lock */
519 static int
520 spdk_nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx,
521 			 bool direct_connect)
522 {
523 	int rc;
524 	struct spdk_nvme_ctrlr *ctrlr;
525 
526 	if (!spdk_nvme_transport_available(probe_ctx->trid.trtype)) {
527 		SPDK_ERRLOG("NVMe trtype %u not available\n", probe_ctx->trid.trtype);
528 		return -1;
529 	}
530 
531 	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
532 
533 	rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect);
534 	if (rc != 0) {
535 		SPDK_ERRLOG("NVMe ctrlr scan failed\n");
536 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
537 		return -1;
538 	}
539 
540 	/*
541 	 * Probe controllers on the shared_attached_ctrlrs list
542 	 */
543 	if (!spdk_process_is_primary() && (probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
544 		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
545 			/* Do not attach other ctrlrs if user specify a valid trid */
546 			if ((strlen(probe_ctx->trid.traddr) != 0) &&
547 			    (spdk_nvme_transport_id_compare(&probe_ctx->trid, &ctrlr->trid))) {
548 				continue;
549 			}
550 
551 			/* Do not attach if we failed to initialize it in this process */
552 			if (spdk_nvme_ctrlr_get_current_process(ctrlr) == NULL) {
553 				continue;
554 			}
555 
556 			nvme_ctrlr_proc_get_ref(ctrlr);
557 
558 			/*
559 			 * Unlock while calling attach_cb() so the user can call other functions
560 			 *  that may take the driver lock, like nvme_detach().
561 			 */
562 			if (probe_ctx->attach_cb) {
563 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
564 				probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
565 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
566 			}
567 		}
568 	}
569 
570 	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
571 
572 	return 0;
573 }
574 
575 static void
576 spdk_nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
577 			 const struct spdk_nvme_transport_id *trid,
578 			 void *cb_ctx,
579 			 spdk_nvme_probe_cb probe_cb,
580 			 spdk_nvme_attach_cb attach_cb,
581 			 spdk_nvme_remove_cb remove_cb)
582 {
583 	probe_ctx->trid = *trid;
584 	probe_ctx->cb_ctx = cb_ctx;
585 	probe_ctx->probe_cb = probe_cb;
586 	probe_ctx->attach_cb = attach_cb;
587 	probe_ctx->remove_cb = remove_cb;
588 	TAILQ_INIT(&probe_ctx->init_ctrlrs);
589 }
590 
591 int
592 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
593 		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
594 		spdk_nvme_remove_cb remove_cb)
595 {
596 	struct spdk_nvme_transport_id trid_pcie;
597 	struct spdk_nvme_probe_ctx *probe_ctx;
598 
599 	if (trid == NULL) {
600 		memset(&trid_pcie, 0, sizeof(trid_pcie));
601 		trid_pcie.trtype = SPDK_NVME_TRANSPORT_PCIE;
602 		trid = &trid_pcie;
603 	}
604 
605 	probe_ctx = spdk_nvme_probe_async(trid, cb_ctx, probe_cb,
606 					  attach_cb, remove_cb);
607 	if (!probe_ctx) {
608 		SPDK_ERRLOG("Create probe context failed\n");
609 		return -1;
610 	}
611 
612 	/*
613 	 * Keep going even if one or more nvme_attach() calls failed,
614 	 *  but maintain the value of rc to signal errors when we return.
615 	 */
616 	return nvme_init_controllers(probe_ctx);
617 }
618 
619 static bool
620 spdk_nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
621 			   struct spdk_nvme_ctrlr_opts *opts)
622 {
623 	struct spdk_nvme_ctrlr_opts *requested_opts = cb_ctx;
624 
625 	assert(requested_opts);
626 	memcpy(opts, requested_opts, sizeof(*opts));
627 
628 	return true;
629 }
630 
631 struct spdk_nvme_ctrlr *
632 spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
633 		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
634 {
635 	int rc;
636 	struct spdk_nvme_ctrlr *ctrlr = NULL;
637 	struct spdk_nvme_probe_ctx *probe_ctx;
638 
639 	if (trid == NULL) {
640 		SPDK_ERRLOG("No transport ID specified\n");
641 		return NULL;
642 	}
643 
644 	if (opts && (opts_size != sizeof(*opts))) {
645 		SPDK_ERRLOG("Invalid opts size\n");
646 		return NULL;
647 	}
648 
649 	probe_ctx = spdk_nvme_connect_async(trid, opts, NULL);
650 	if (!probe_ctx) {
651 		SPDK_ERRLOG("Create probe context failed\n");
652 		return NULL;
653 	}
654 
655 	rc = nvme_init_controllers(probe_ctx);
656 	if (rc != 0) {
657 		return NULL;
658 	}
659 
660 	ctrlr = spdk_nvme_get_ctrlr_by_trid(trid);
661 
662 	return ctrlr;
663 }
664 
665 int
666 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
667 {
668 	if (trtype == NULL || str == NULL) {
669 		return -EINVAL;
670 	}
671 
672 	if (strcasecmp(str, "PCIe") == 0) {
673 		*trtype = SPDK_NVME_TRANSPORT_PCIE;
674 	} else if (strcasecmp(str, "RDMA") == 0) {
675 		*trtype = SPDK_NVME_TRANSPORT_RDMA;
676 	} else if (strcasecmp(str, "FC") == 0) {
677 		*trtype = SPDK_NVME_TRANSPORT_FC;
678 	} else if (strcasecmp(str, "TCP") == 0) {
679 		*trtype = SPDK_NVME_TRANSPORT_TCP;
680 	} else {
681 		return -ENOENT;
682 	}
683 	return 0;
684 }
685 
686 const char *
687 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
688 {
689 	switch (trtype) {
690 	case SPDK_NVME_TRANSPORT_PCIE:
691 		return "PCIe";
692 	case SPDK_NVME_TRANSPORT_RDMA:
693 		return "RDMA";
694 	case SPDK_NVME_TRANSPORT_FC:
695 		return "FC";
696 	case SPDK_NVME_TRANSPORT_TCP:
697 		return "TCP";
698 	default:
699 		return NULL;
700 	}
701 }
702 
703 int
704 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
705 {
706 	if (adrfam == NULL || str == NULL) {
707 		return -EINVAL;
708 	}
709 
710 	if (strcasecmp(str, "IPv4") == 0) {
711 		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
712 	} else if (strcasecmp(str, "IPv6") == 0) {
713 		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
714 	} else if (strcasecmp(str, "IB") == 0) {
715 		*adrfam = SPDK_NVMF_ADRFAM_IB;
716 	} else if (strcasecmp(str, "FC") == 0) {
717 		*adrfam = SPDK_NVMF_ADRFAM_FC;
718 	} else {
719 		return -ENOENT;
720 	}
721 	return 0;
722 }
723 
724 const char *
725 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
726 {
727 	switch (adrfam) {
728 	case SPDK_NVMF_ADRFAM_IPV4:
729 		return "IPv4";
730 	case SPDK_NVMF_ADRFAM_IPV6:
731 		return "IPv6";
732 	case SPDK_NVMF_ADRFAM_IB:
733 		return "IB";
734 	case SPDK_NVMF_ADRFAM_FC:
735 		return "FC";
736 	default:
737 		return NULL;
738 	}
739 }
740 
741 static size_t
742 parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, size_t val_buf_size)
743 {
744 
745 	const char *sep, *sep1;
746 	const char *whitespace = " \t\n";
747 	size_t key_len, val_len;
748 
749 	*str += strspn(*str, whitespace);
750 
751 	sep = strchr(*str, ':');
752 	if (!sep) {
753 		sep = strchr(*str, '=');
754 		if (!sep) {
755 			SPDK_ERRLOG("Key without ':' or '=' separator\n");
756 			return 0;
757 		}
758 	} else {
759 		sep1 = strchr(*str, '=');
760 		if ((sep1 != NULL) && (sep1 < sep)) {
761 			sep = sep1;
762 		}
763 	}
764 
765 	key_len = sep - *str;
766 	if (key_len >= key_buf_size) {
767 		SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
768 			    key_len, key_buf_size - 1);
769 		return 0;
770 	}
771 
772 	memcpy(key, *str, key_len);
773 	key[key_len] = '\0';
774 
775 	*str += key_len + 1; /* Skip key: */
776 	val_len = strcspn(*str, whitespace);
777 	if (val_len == 0) {
778 		SPDK_ERRLOG("Key without value\n");
779 		return 0;
780 	}
781 
782 	if (val_len >= val_buf_size) {
783 		SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
784 			    val_len, val_buf_size - 1);
785 		return 0;
786 	}
787 
788 	memcpy(val, *str, val_len);
789 	val[val_len] = '\0';
790 
791 	*str += val_len;
792 
793 	return val_len;
794 }
795 
796 int
797 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
798 {
799 	size_t val_len;
800 	char key[32];
801 	char val[1024];
802 
803 	if (trid == NULL || str == NULL) {
804 		return -EINVAL;
805 	}
806 
807 	while (*str != '\0') {
808 
809 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
810 
811 		if (val_len == 0) {
812 			SPDK_ERRLOG("Failed to parse transport ID\n");
813 			return -EINVAL;
814 		}
815 
816 		if (strcasecmp(key, "trtype") == 0) {
817 			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
818 				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
819 				return -EINVAL;
820 			}
821 		} else if (strcasecmp(key, "adrfam") == 0) {
822 			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
823 				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
824 				return -EINVAL;
825 			}
826 		} else if (strcasecmp(key, "traddr") == 0) {
827 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
828 				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
829 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
830 				return -EINVAL;
831 			}
832 			memcpy(trid->traddr, val, val_len + 1);
833 		} else if (strcasecmp(key, "trsvcid") == 0) {
834 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
835 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
836 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
837 				return -EINVAL;
838 			}
839 			memcpy(trid->trsvcid, val, val_len + 1);
840 		} else if (strcasecmp(key, "subnqn") == 0) {
841 			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
842 				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
843 					    val_len, SPDK_NVMF_NQN_MAX_LEN);
844 				return -EINVAL;
845 			}
846 			memcpy(trid->subnqn, val, val_len + 1);
847 		} else if (strcasecmp(key, "hostaddr") == 0) {
848 			continue;
849 		} else if (strcasecmp(key, "hostsvcid") == 0) {
850 			continue;
851 		} else if (strcasecmp(key, "ns") == 0) {
852 			/*
853 			 * Special case.  The namespace id parameter may
854 			 * optionally be passed in the transport id string
855 			 * for an SPDK application (e.g. nvme/perf)
856 			 * and additionally parsed therein to limit
857 			 * targeting a specific namespace.  For this
858 			 * scenario, just silently ignore this key
859 			 * rather than letting it default to logging
860 			 * it as an invalid key.
861 			 */
862 			continue;
863 		} else {
864 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
865 		}
866 	}
867 
868 	return 0;
869 }
870 
871 int
872 spdk_nvme_host_id_parse(struct spdk_nvme_host_id *hostid, const char *str)
873 {
874 
875 	size_t key_size = 32;
876 	size_t val_size = 1024;
877 	size_t val_len;
878 	char key[key_size];
879 	char val[val_size];
880 
881 	if (hostid == NULL || str == NULL) {
882 		return -EINVAL;
883 	}
884 
885 	while (*str != '\0') {
886 
887 		val_len = parse_next_key(&str, key, val, key_size, val_size);
888 
889 		if (val_len == 0) {
890 			SPDK_ERRLOG("Failed to parse host ID\n");
891 			return val_len;
892 		}
893 
894 		/* Ignore the rest of the options from the transport ID. */
895 		if (strcasecmp(key, "trtype") == 0) {
896 			continue;
897 		} else if (strcasecmp(key, "adrfam") == 0) {
898 			continue;
899 		} else if (strcasecmp(key, "traddr") == 0) {
900 			continue;
901 		} else if (strcasecmp(key, "trsvcid") == 0) {
902 			continue;
903 		} else if (strcasecmp(key, "subnqn") == 0) {
904 			continue;
905 		} else if (strcasecmp(key, "ns") == 0) {
906 			continue;
907 		} else if (strcasecmp(key, "hostaddr") == 0) {
908 			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
909 				SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
910 					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
911 				return -EINVAL;
912 			}
913 			memcpy(hostid->hostaddr, val, val_len + 1);
914 
915 		} else if (strcasecmp(key, "hostsvcid") == 0) {
916 			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
917 				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
918 					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
919 				return -EINVAL;
920 			}
921 			memcpy(hostid->hostsvcid, val, val_len + 1);
922 		} else {
923 			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
924 		}
925 	}
926 
927 	return 0;
928 }
929 
930 static int
931 cmp_int(int a, int b)
932 {
933 	return a - b;
934 }
935 
936 int
937 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
938 			       const struct spdk_nvme_transport_id *trid2)
939 {
940 	int cmp;
941 
942 	cmp = cmp_int(trid1->trtype, trid2->trtype);
943 	if (cmp) {
944 		return cmp;
945 	}
946 
947 	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
948 		struct spdk_pci_addr pci_addr1;
949 		struct spdk_pci_addr pci_addr2;
950 
951 		/* Normalize PCI addresses before comparing */
952 		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
953 		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
954 			return -1;
955 		}
956 
957 		/* PCIe transport ID only uses trtype and traddr */
958 		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
959 	}
960 
961 	cmp = strcasecmp(trid1->traddr, trid2->traddr);
962 	if (cmp) {
963 		return cmp;
964 	}
965 
966 	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
967 	if (cmp) {
968 		return cmp;
969 	}
970 
971 	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
972 	if (cmp) {
973 		return cmp;
974 	}
975 
976 	cmp = strcmp(trid1->subnqn, trid2->subnqn);
977 	if (cmp) {
978 		return cmp;
979 	}
980 
981 	return 0;
982 }
983 
984 int
985 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags, const char *str)
986 {
987 	size_t val_len;
988 	char key[32];
989 	char val[1024];
990 
991 	if (prchk_flags == NULL || str == NULL) {
992 		return -EINVAL;
993 	}
994 
995 	while (*str != '\0') {
996 		val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val));
997 
998 		if (val_len == 0) {
999 			SPDK_ERRLOG("Failed to parse prchk\n");
1000 			return -EINVAL;
1001 		}
1002 
1003 		if (strcasecmp(key, "prchk") == 0) {
1004 			if (strcasestr(val, "reftag") != NULL) {
1005 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
1006 			}
1007 			if (strcasestr(val, "guard") != NULL) {
1008 				*prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
1009 			}
1010 		} else {
1011 			SPDK_ERRLOG("Unknown key '%s'\n", key);
1012 			return -EINVAL;
1013 		}
1014 	}
1015 
1016 	return 0;
1017 }
1018 
1019 const char *
1020 spdk_nvme_prchk_flags_str(uint32_t prchk_flags)
1021 {
1022 	if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
1023 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1024 			return "prchk:reftag|guard";
1025 		} else {
1026 			return "prchk:reftag";
1027 		}
1028 	} else {
1029 		if (prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
1030 			return "prchk:guard";
1031 		} else {
1032 			return NULL;
1033 		}
1034 	}
1035 }
1036 
1037 struct spdk_nvme_probe_ctx *
1038 spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
1039 		      void *cb_ctx,
1040 		      spdk_nvme_probe_cb probe_cb,
1041 		      spdk_nvme_attach_cb attach_cb,
1042 		      spdk_nvme_remove_cb remove_cb)
1043 {
1044 	int rc;
1045 	struct spdk_nvme_probe_ctx *probe_ctx;
1046 
1047 	rc = nvme_driver_init();
1048 	if (rc != 0) {
1049 		return NULL;
1050 	}
1051 
1052 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1053 	if (!probe_ctx) {
1054 		return NULL;
1055 	}
1056 
1057 	spdk_nvme_probe_ctx_init(probe_ctx, trid, cb_ctx, probe_cb, attach_cb, remove_cb);
1058 	rc = spdk_nvme_probe_internal(probe_ctx, false);
1059 	if (rc != 0) {
1060 		free(probe_ctx);
1061 		return NULL;
1062 	}
1063 
1064 	return probe_ctx;
1065 }
1066 
1067 int
1068 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
1069 {
1070 	int rc = 0;
1071 	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
1072 
1073 	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1074 		free(probe_ctx);
1075 		return 0;
1076 	}
1077 
1078 	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
1079 		rc = nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
1080 		if (rc != 0) {
1081 			rc = -EIO;
1082 			break;
1083 		}
1084 	}
1085 
1086 	if (rc != 0 || TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
1087 		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
1088 		g_spdk_nvme_driver->initialized = true;
1089 		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
1090 		free(probe_ctx);
1091 		return rc;
1092 	}
1093 
1094 	return -EAGAIN;
1095 }
1096 
1097 struct spdk_nvme_probe_ctx *
1098 spdk_nvme_connect_async(const struct spdk_nvme_transport_id *trid,
1099 			const struct spdk_nvme_ctrlr_opts *opts,
1100 			spdk_nvme_attach_cb attach_cb)
1101 {
1102 	int rc;
1103 	spdk_nvme_probe_cb probe_cb = NULL;
1104 	struct spdk_nvme_probe_ctx *probe_ctx;
1105 
1106 	rc = nvme_driver_init();
1107 	if (rc != 0) {
1108 		return NULL;
1109 	}
1110 
1111 	probe_ctx = calloc(1, sizeof(*probe_ctx));
1112 	if (!probe_ctx) {
1113 		return NULL;
1114 	}
1115 
1116 	if (opts) {
1117 		probe_cb = spdk_nvme_connect_probe_cb;
1118 	}
1119 
1120 	spdk_nvme_probe_ctx_init(probe_ctx, trid, (void *)opts, probe_cb, attach_cb, NULL);
1121 	rc = spdk_nvme_probe_internal(probe_ctx, true);
1122 	if (rc != 0) {
1123 		free(probe_ctx);
1124 		return NULL;
1125 	}
1126 
1127 	return probe_ctx;
1128 }
1129 
1130 SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME)
1131