xref: /spdk/module/event/subsystems/nvmf/nvmf_tgt.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "event_nvmf.h"
36 
37 #include "spdk/bdev.h"
38 #include "spdk/thread.h"
39 #include "spdk/log.h"
40 #include "spdk/nvme.h"
41 #include "spdk/nvmf_cmd.h"
42 #include "spdk_internal/usdt.h"
43 
44 enum nvmf_tgt_state {
45 	NVMF_TGT_INIT_NONE = 0,
46 	NVMF_TGT_INIT_CREATE_TARGET,
47 	NVMF_TGT_INIT_CREATE_POLL_GROUPS,
48 	NVMF_TGT_INIT_START_SUBSYSTEMS,
49 	NVMF_TGT_RUNNING,
50 	NVMF_TGT_FINI_STOP_SUBSYSTEMS,
51 	NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
52 	NVMF_TGT_FINI_FREE_RESOURCES,
53 	NVMF_TGT_STOPPED,
54 	NVMF_TGT_ERROR,
55 };
56 
57 struct nvmf_tgt_poll_group {
58 	struct spdk_nvmf_poll_group		*group;
59 	struct spdk_thread			*thread;
60 	TAILQ_ENTRY(nvmf_tgt_poll_group)	link;
61 };
62 
63 struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf = {
64 	.acceptor_poll_rate = ACCEPT_TIMEOUT_US,
65 	.admin_passthru.identify_ctrlr = false
66 };
67 
68 struct spdk_cpuset *g_poll_groups_mask = NULL;
69 struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
70 uint32_t g_spdk_nvmf_tgt_max_subsystems = 0;
71 uint16_t g_spdk_nvmf_tgt_crdt[3] = {0, 0, 0};
72 
73 static enum nvmf_tgt_state g_tgt_state;
74 
75 static struct spdk_thread *g_tgt_init_thread = NULL;
76 static struct spdk_thread *g_tgt_fini_thread = NULL;
77 
78 static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
79 static size_t g_num_poll_groups = 0;
80 
81 static void nvmf_tgt_advance_state(void);
82 
83 static void
84 nvmf_shutdown_cb(void *arg1)
85 {
86 	/* Still in initialization state, defer shutdown operation */
87 	if (g_tgt_state < NVMF_TGT_RUNNING) {
88 		spdk_thread_send_msg(spdk_get_thread(), nvmf_shutdown_cb, NULL);
89 		return;
90 	} else if (g_tgt_state != NVMF_TGT_RUNNING && g_tgt_state != NVMF_TGT_ERROR) {
91 		/* Already in Shutdown status, ignore the signal */
92 		return;
93 	}
94 
95 	if (g_tgt_state == NVMF_TGT_ERROR) {
96 		/* Parse configuration error */
97 		g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
98 	} else {
99 		g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
100 	}
101 	nvmf_tgt_advance_state();
102 }
103 
104 static void
105 nvmf_subsystem_fini(void)
106 {
107 	nvmf_shutdown_cb(NULL);
108 }
109 
110 static void
111 _nvmf_tgt_destroy_poll_group_done(void *ctx)
112 {
113 	assert(g_num_poll_groups > 0);
114 
115 	if (--g_num_poll_groups == 0) {
116 		g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
117 		nvmf_tgt_advance_state();
118 	}
119 }
120 
121 static void
122 nvmf_tgt_destroy_poll_group_done(void *cb_arg, int status)
123 {
124 	struct nvmf_tgt_poll_group *pg = cb_arg;
125 
126 	free(pg);
127 
128 	spdk_thread_send_msg(g_tgt_fini_thread, _nvmf_tgt_destroy_poll_group_done, NULL);
129 
130 	spdk_thread_exit(spdk_get_thread());
131 }
132 
133 static void
134 nvmf_tgt_destroy_poll_group(void *ctx)
135 {
136 	struct nvmf_tgt_poll_group *pg = ctx;
137 
138 	spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_group_done, pg);
139 }
140 
141 static void
142 nvmf_tgt_destroy_poll_groups(void)
143 {
144 	struct nvmf_tgt_poll_group *pg, *tpg;
145 
146 	g_tgt_fini_thread = spdk_get_thread();
147 	assert(g_tgt_fini_thread != NULL);
148 
149 	TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) {
150 		TAILQ_REMOVE(&g_poll_groups, pg, link);
151 		spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg);
152 	}
153 }
154 
155 static uint32_t
156 nvmf_get_cpuset_count(void)
157 {
158 	if (g_poll_groups_mask) {
159 		return spdk_cpuset_count(g_poll_groups_mask);
160 	} else {
161 		return spdk_env_get_core_count();
162 	}
163 }
164 
165 static void
166 nvmf_tgt_create_poll_group_done(void *ctx)
167 {
168 	struct nvmf_tgt_poll_group *pg = ctx;
169 
170 	assert(pg);
171 
172 	if (!pg->group) {
173 		SPDK_ERRLOG("Failed to create nvmf poll group\n");
174 		/* Change the state to error but wait for completions from all other threads */
175 		g_tgt_state = NVMF_TGT_ERROR;
176 	}
177 
178 	TAILQ_INSERT_TAIL(&g_poll_groups, pg, link);
179 
180 	assert(g_num_poll_groups < nvmf_get_cpuset_count());
181 
182 	if (++g_num_poll_groups == nvmf_get_cpuset_count()) {
183 		if (g_tgt_state != NVMF_TGT_ERROR) {
184 			g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS;
185 		}
186 		nvmf_tgt_advance_state();
187 	}
188 }
189 
190 static void
191 nvmf_tgt_create_poll_group(void *ctx)
192 {
193 	struct nvmf_tgt_poll_group *pg;
194 
195 	pg = calloc(1, sizeof(*pg));
196 	if (!pg) {
197 		SPDK_ERRLOG("Not enough memory to allocate poll groups\n");
198 		g_tgt_state = NVMF_TGT_ERROR;
199 		nvmf_tgt_advance_state();
200 		return;
201 	}
202 
203 	pg->thread = spdk_get_thread();
204 	pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt);
205 
206 	spdk_thread_send_msg(g_tgt_init_thread, nvmf_tgt_create_poll_group_done, pg);
207 }
208 
209 static void
210 nvmf_tgt_create_poll_groups(void)
211 {
212 	uint32_t i;
213 	char thread_name[32];
214 	struct spdk_thread *thread;
215 
216 	g_tgt_init_thread = spdk_get_thread();
217 	assert(g_tgt_init_thread != NULL);
218 
219 	SPDK_ENV_FOREACH_CORE(i) {
220 		if (g_poll_groups_mask && !spdk_cpuset_get_cpu(g_poll_groups_mask, i)) {
221 			continue;
222 		}
223 		snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", i);
224 
225 		thread = spdk_thread_create(thread_name, g_poll_groups_mask);
226 		assert(thread != NULL);
227 
228 		spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL);
229 	}
230 }
231 
232 static void
233 nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
234 			   void *cb_arg, int status)
235 {
236 	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
237 	int rc;
238 
239 	if (subsystem) {
240 		rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
241 		if (rc) {
242 			g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
243 			SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
244 			nvmf_tgt_advance_state();
245 		}
246 		return;
247 	}
248 
249 	g_tgt_state = NVMF_TGT_RUNNING;
250 	nvmf_tgt_advance_state();
251 }
252 
253 static void
254 nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
255 			   void *cb_arg, int status)
256 {
257 	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
258 	int rc;
259 
260 	if (subsystem) {
261 		rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
262 		if (rc) {
263 			SPDK_ERRLOG("Unable to stop NVMe-oF subsystem. Trying others.\n");
264 			nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
265 		}
266 		return;
267 	}
268 
269 	g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
270 	nvmf_tgt_advance_state();
271 }
272 
273 static void
274 nvmf_tgt_destroy_done(void *ctx, int status)
275 {
276 	g_tgt_state = NVMF_TGT_STOPPED;
277 
278 	nvmf_tgt_advance_state();
279 }
280 
281 static int
282 nvmf_add_discovery_subsystem(void)
283 {
284 	struct spdk_nvmf_subsystem *subsystem;
285 
286 	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN,
287 					       SPDK_NVMF_SUBTYPE_DISCOVERY, 0);
288 	if (subsystem == NULL) {
289 		SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
290 		return -1;
291 	}
292 
293 	spdk_nvmf_subsystem_set_allow_any_host(subsystem, true);
294 
295 	return 0;
296 }
297 
298 static int
299 nvmf_tgt_create_target(void)
300 {
301 	struct spdk_nvmf_target_opts opts = {
302 		.name = "nvmf_tgt"
303 	};
304 
305 	opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems;
306 	opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf.acceptor_poll_rate;
307 	opts.crdt[0] = g_spdk_nvmf_tgt_crdt[0];
308 	opts.crdt[1] = g_spdk_nvmf_tgt_crdt[1];
309 	opts.crdt[2] = g_spdk_nvmf_tgt_crdt[2];
310 	opts.discovery_filter = g_spdk_nvmf_tgt_conf.discovery_filter;
311 	g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts);
312 	if (!g_spdk_nvmf_tgt) {
313 		SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n");
314 		return -1;
315 	}
316 
317 	if (nvmf_add_discovery_subsystem() != 0) {
318 		SPDK_ERRLOG("nvmf_add_discovery_subsystem failed\n");
319 		return -1;
320 	}
321 
322 	return 0;
323 }
324 
325 static void
326 fixup_identify_ctrlr(struct spdk_nvmf_request *req)
327 {
328 	uint32_t length;
329 	int rc;
330 	struct spdk_nvme_ctrlr_data *nvme_cdata;
331 	struct spdk_nvme_ctrlr_data nvmf_cdata = {};
332 	struct spdk_nvmf_ctrlr *ctrlr = spdk_nvmf_request_get_ctrlr(req);
333 	struct spdk_nvme_cpl *rsp = spdk_nvmf_request_get_response(req);
334 
335 	/* This is the identify data from the NVMe drive */
336 	spdk_nvmf_request_get_data(req, (void **)&nvme_cdata, &length);
337 
338 	/* Get the NVMF identify data */
339 	rc = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, &nvmf_cdata);
340 	if (rc != SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
341 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
342 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
343 		return;
344 	}
345 
346 	/* Fixup NVMF identify data with NVMe identify data */
347 
348 	/* Serial Number (SN) */
349 	memcpy(&nvmf_cdata.sn[0], &nvme_cdata->sn[0], sizeof(nvmf_cdata.sn));
350 	/* Model Number (MN) */
351 	memcpy(&nvmf_cdata.mn[0], &nvme_cdata->mn[0], sizeof(nvmf_cdata.mn));
352 	/* Firmware Revision (FR) */
353 	memcpy(&nvmf_cdata.fr[0], &nvme_cdata->fr[0], sizeof(nvmf_cdata.fr));
354 	/* IEEE OUI Identifier (IEEE) */
355 	memcpy(&nvmf_cdata.ieee[0], &nvme_cdata->ieee[0], sizeof(nvmf_cdata.ieee));
356 	/* FRU Globally Unique Identifier (FGUID) */
357 
358 	/* Copy the fixed up data back to the response */
359 	memcpy(nvme_cdata, &nvmf_cdata, length);
360 }
361 
362 static int
363 nvmf_custom_identify_hdlr(struct spdk_nvmf_request *req)
364 {
365 	struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
366 	struct spdk_bdev *bdev;
367 	struct spdk_bdev_desc *desc;
368 	struct spdk_io_channel *ch;
369 	struct spdk_nvmf_subsystem *subsys;
370 	int rc;
371 
372 	if (cmd->cdw10_bits.identify.cns != SPDK_NVME_IDENTIFY_CTRLR) {
373 		return -1; /* continue */
374 	}
375 
376 	subsys = spdk_nvmf_request_get_subsystem(req);
377 	if (subsys == NULL) {
378 		return -1;
379 	}
380 
381 	/* Only procss this request if it has exactly one namespace */
382 	if (spdk_nvmf_subsystem_get_max_nsid(subsys) != 1) {
383 		return -1;
384 	}
385 
386 	/* Forward to first namespace if it supports NVME admin commands */
387 	rc = spdk_nvmf_request_get_bdev(1, req, &bdev, &desc, &ch);
388 	if (rc) {
389 		/* No bdev found for this namespace. Continue. */
390 		return -1;
391 	}
392 
393 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) {
394 		return -1;
395 	}
396 
397 	return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, fixup_identify_ctrlr);
398 }
399 
400 static void
401 nvmf_tgt_advance_state(void)
402 {
403 	enum nvmf_tgt_state prev_state;
404 	int rc = -1;
405 	int ret;
406 
407 	do {
408 		SPDK_DTRACE_PROBE1(nvmf_tgt_state, g_tgt_state);
409 		prev_state = g_tgt_state;
410 
411 		switch (g_tgt_state) {
412 		case NVMF_TGT_INIT_NONE: {
413 			g_tgt_state = NVMF_TGT_INIT_CREATE_TARGET;
414 			break;
415 		}
416 		case NVMF_TGT_INIT_CREATE_TARGET:
417 			ret = nvmf_tgt_create_target();
418 			g_tgt_state = (ret == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR;
419 			break;
420 		case NVMF_TGT_INIT_CREATE_POLL_GROUPS:
421 			if (g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr) {
422 				SPDK_NOTICELOG("Custom identify ctrlr handler enabled\n");
423 				spdk_nvmf_set_custom_admin_cmd_hdlr(SPDK_NVME_OPC_IDENTIFY, nvmf_custom_identify_hdlr);
424 			}
425 			/* Create poll group threads, and send a message to each thread
426 			 * and create a poll group.
427 			 */
428 			nvmf_tgt_create_poll_groups();
429 			break;
430 		case NVMF_TGT_INIT_START_SUBSYSTEMS: {
431 			struct spdk_nvmf_subsystem *subsystem;
432 
433 			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
434 
435 			if (subsystem) {
436 				ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
437 				if (ret) {
438 					SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
439 					g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
440 				}
441 			} else {
442 				g_tgt_state = NVMF_TGT_RUNNING;
443 			}
444 			break;
445 		}
446 		case NVMF_TGT_RUNNING:
447 			spdk_subsystem_init_next(0);
448 			break;
449 		case NVMF_TGT_FINI_STOP_SUBSYSTEMS: {
450 			struct spdk_nvmf_subsystem *subsystem;
451 
452 			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
453 
454 			if (subsystem) {
455 				ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
456 				if (ret) {
457 					nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
458 				}
459 			} else {
460 				g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
461 			}
462 			break;
463 		}
464 		case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
465 			/* Send a message to each poll group thread, and terminate the thread */
466 			nvmf_tgt_destroy_poll_groups();
467 			break;
468 		case NVMF_TGT_FINI_FREE_RESOURCES:
469 			spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
470 			break;
471 		case NVMF_TGT_STOPPED:
472 			spdk_subsystem_fini_next();
473 			return;
474 		case NVMF_TGT_ERROR:
475 			spdk_subsystem_init_next(rc);
476 			return;
477 		}
478 
479 	} while (g_tgt_state != prev_state);
480 }
481 
482 static void
483 nvmf_subsystem_init(void)
484 {
485 	g_tgt_state = NVMF_TGT_INIT_NONE;
486 	nvmf_tgt_advance_state();
487 }
488 
489 static void
490 nvmf_subsystem_dump_discover_filter(struct spdk_json_write_ctx *w)
491 {
492 	static char const *const answers[] = {
493 		"match_any",
494 		"transport",
495 		"address",
496 		"transport,address",
497 		"svcid",
498 		"transport,svcid",
499 		"address,svcid",
500 		"transport,address,svcid"
501 	};
502 
503 	if ((g_spdk_nvmf_tgt_conf.discovery_filter & ~(SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_TYPE |
504 			SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_ADDRESS |
505 			SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_SVCID)) != 0) {
506 		SPDK_ERRLOG("Incorrect discovery filter %d\n", g_spdk_nvmf_tgt_conf.discovery_filter);
507 		assert(0);
508 		return;
509 	}
510 
511 	spdk_json_write_named_string(w, "discovery_filter", answers[g_spdk_nvmf_tgt_conf.discovery_filter]);
512 }
513 
514 static void
515 nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w)
516 {
517 	spdk_json_write_array_begin(w);
518 
519 	spdk_json_write_object_begin(w);
520 	spdk_json_write_named_string(w, "method", "nvmf_set_config");
521 
522 	spdk_json_write_named_object_begin(w, "params");
523 	spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf.acceptor_poll_rate);
524 	nvmf_subsystem_dump_discover_filter(w);
525 	spdk_json_write_named_object_begin(w, "admin_cmd_passthru");
526 	spdk_json_write_named_bool(w, "identify_ctrlr",
527 				   g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr);
528 	spdk_json_write_object_end(w);
529 	if (g_poll_groups_mask) {
530 		spdk_json_write_named_string(w, "poll_groups_mask", spdk_cpuset_fmt(g_poll_groups_mask));
531 	}
532 	spdk_json_write_object_end(w);
533 	spdk_json_write_object_end(w);
534 
535 	spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt);
536 	spdk_json_write_array_end(w);
537 }
538 
539 static struct spdk_subsystem g_spdk_subsystem_nvmf = {
540 	.name = "nvmf",
541 	.init = nvmf_subsystem_init,
542 	.fini = nvmf_subsystem_fini,
543 	.write_config_json = nvmf_subsystem_write_config_json,
544 };
545 
546 SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf)
547 SPDK_SUBSYSTEM_DEPEND(nvmf, bdev)
548 SPDK_SUBSYSTEM_DEPEND(nvmf, sock)
549