xref: /spdk/module/event/subsystems/nvmf/nvmf_tgt.c (revision 1078198e78653b2f39414c1566740018d76ee73d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2018 Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "event_nvmf.h"
8 
9 #include "spdk/bdev.h"
10 #include "spdk/thread.h"
11 #include "spdk/log.h"
12 #include "spdk/nvme.h"
13 #include "spdk/nvmf_cmd.h"
14 #include "spdk_internal/usdt.h"
15 
16 enum nvmf_tgt_state {
17 	NVMF_TGT_INIT_NONE = 0,
18 	NVMF_TGT_INIT_CREATE_TARGET,
19 	NVMF_TGT_INIT_CREATE_POLL_GROUPS,
20 	NVMF_TGT_INIT_START_SUBSYSTEMS,
21 	NVMF_TGT_RUNNING,
22 	NVMF_TGT_FINI_STOP_SUBSYSTEMS,
23 	NVMF_TGT_FINI_DESTROY_SUBSYSTEMS,
24 	NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
25 	NVMF_TGT_FINI_DESTROY_TARGET,
26 	NVMF_TGT_STOPPED,
27 	NVMF_TGT_ERROR,
28 };
29 
30 struct nvmf_tgt_poll_group {
31 	struct spdk_nvmf_poll_group		*group;
32 	struct spdk_thread			*thread;
33 	TAILQ_ENTRY(nvmf_tgt_poll_group)	link;
34 };
35 
36 struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf = {
37 	.admin_passthru.identify_ctrlr = false
38 };
39 
40 struct spdk_cpuset *g_poll_groups_mask = NULL;
41 struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
42 uint32_t g_spdk_nvmf_tgt_max_subsystems = 0;
43 uint16_t g_spdk_nvmf_tgt_crdt[3] = {0, 0, 0};
44 
45 static enum nvmf_tgt_state g_tgt_state;
46 
47 static struct spdk_thread *g_tgt_init_thread = NULL;
48 static struct spdk_thread *g_tgt_fini_thread = NULL;
49 
50 static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
51 static size_t g_num_poll_groups = 0;
52 
53 static void nvmf_tgt_advance_state(void);
54 
55 static void
56 nvmf_shutdown_cb(void *arg1)
57 {
58 	/* Still in initialization state, defer shutdown operation */
59 	if (g_tgt_state < NVMF_TGT_RUNNING) {
60 		spdk_thread_send_msg(spdk_get_thread(), nvmf_shutdown_cb, NULL);
61 		return;
62 	} else if (g_tgt_state != NVMF_TGT_RUNNING && g_tgt_state != NVMF_TGT_ERROR) {
63 		/* Already in Shutdown status, ignore the signal */
64 		return;
65 	}
66 
67 	if (g_tgt_state == NVMF_TGT_ERROR) {
68 		/* Parse configuration error */
69 		g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET;
70 	} else {
71 		g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
72 	}
73 	nvmf_tgt_advance_state();
74 }
75 
76 static void
77 nvmf_subsystem_fini(void)
78 {
79 	nvmf_shutdown_cb(NULL);
80 }
81 
82 static void
83 _nvmf_tgt_destroy_poll_group_done(void *ctx)
84 {
85 	assert(g_num_poll_groups > 0);
86 
87 	if (--g_num_poll_groups == 0) {
88 		g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET;
89 		nvmf_tgt_advance_state();
90 	}
91 }
92 
93 static void
94 nvmf_tgt_destroy_poll_group_done(void *cb_arg, int status)
95 {
96 	struct nvmf_tgt_poll_group *pg = cb_arg;
97 
98 	free(pg);
99 
100 	spdk_thread_send_msg(g_tgt_fini_thread, _nvmf_tgt_destroy_poll_group_done, NULL);
101 
102 	spdk_thread_exit(spdk_get_thread());
103 }
104 
105 static void
106 nvmf_tgt_destroy_poll_group(void *ctx)
107 {
108 	struct nvmf_tgt_poll_group *pg = ctx;
109 
110 	spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_group_done, pg);
111 }
112 
113 static void
114 nvmf_tgt_destroy_poll_groups(void)
115 {
116 	struct nvmf_tgt_poll_group *pg, *tpg;
117 
118 	g_tgt_fini_thread = spdk_get_thread();
119 	assert(g_tgt_fini_thread != NULL);
120 
121 	TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) {
122 		TAILQ_REMOVE(&g_poll_groups, pg, link);
123 		spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg);
124 	}
125 }
126 
127 static uint32_t
128 nvmf_get_cpuset_count(void)
129 {
130 	if (g_poll_groups_mask) {
131 		return spdk_cpuset_count(g_poll_groups_mask);
132 	} else {
133 		return spdk_env_get_core_count();
134 	}
135 }
136 
137 static void
138 nvmf_tgt_create_poll_group_done(void *ctx)
139 {
140 	struct nvmf_tgt_poll_group *pg = ctx;
141 
142 	assert(pg);
143 
144 	if (!pg->group) {
145 		SPDK_ERRLOG("Failed to create nvmf poll group\n");
146 		/* Change the state to error but wait for completions from all other threads */
147 		g_tgt_state = NVMF_TGT_ERROR;
148 	}
149 
150 	TAILQ_INSERT_TAIL(&g_poll_groups, pg, link);
151 
152 	assert(g_num_poll_groups < nvmf_get_cpuset_count());
153 
154 	if (++g_num_poll_groups == nvmf_get_cpuset_count()) {
155 		if (g_tgt_state != NVMF_TGT_ERROR) {
156 			g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS;
157 		}
158 		nvmf_tgt_advance_state();
159 	}
160 }
161 
162 static void
163 nvmf_tgt_create_poll_group(void *ctx)
164 {
165 	struct nvmf_tgt_poll_group *pg;
166 
167 	pg = calloc(1, sizeof(*pg));
168 	if (!pg) {
169 		SPDK_ERRLOG("Not enough memory to allocate poll groups\n");
170 		g_tgt_state = NVMF_TGT_ERROR;
171 		nvmf_tgt_advance_state();
172 		return;
173 	}
174 
175 	pg->thread = spdk_get_thread();
176 	pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt);
177 
178 	spdk_thread_send_msg(g_tgt_init_thread, nvmf_tgt_create_poll_group_done, pg);
179 }
180 
181 static void
182 nvmf_tgt_create_poll_groups(void)
183 {
184 	uint32_t cpu, count = 0;
185 	char thread_name[32];
186 	struct spdk_thread *thread;
187 
188 	g_tgt_init_thread = spdk_get_thread();
189 	assert(g_tgt_init_thread != NULL);
190 
191 	SPDK_ENV_FOREACH_CORE(cpu) {
192 		if (g_poll_groups_mask && !spdk_cpuset_get_cpu(g_poll_groups_mask, cpu)) {
193 			continue;
194 		}
195 		snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", count++);
196 
197 		thread = spdk_thread_create(thread_name, g_poll_groups_mask);
198 		assert(thread != NULL);
199 
200 		spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL);
201 	}
202 }
203 
204 static void
205 nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
206 			   void *cb_arg, int status)
207 {
208 	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
209 	int rc;
210 
211 	if (subsystem) {
212 		rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
213 		if (rc) {
214 			g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
215 			SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
216 			nvmf_tgt_advance_state();
217 		}
218 		return;
219 	}
220 
221 	g_tgt_state = NVMF_TGT_RUNNING;
222 	nvmf_tgt_advance_state();
223 }
224 
225 static void
226 nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
227 			   void *cb_arg, int status)
228 {
229 	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
230 	int rc;
231 
232 	if (subsystem) {
233 		rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
234 		if (rc) {
235 			SPDK_ERRLOG("Unable to stop NVMe-oF subsystem %s with rc %d, Trying others.\n",
236 				    spdk_nvmf_subsystem_get_nqn(subsystem), rc);
237 			nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
238 		}
239 		return;
240 	}
241 
242 	g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS;
243 	nvmf_tgt_advance_state();
244 }
245 
246 static void
247 _nvmf_tgt_subsystem_destroy(void *cb_arg)
248 {
249 	struct spdk_nvmf_subsystem *subsystem, *next_subsystem;
250 	int rc;
251 
252 	subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
253 
254 	while (subsystem != NULL) {
255 		next_subsystem = spdk_nvmf_subsystem_get_next(subsystem);
256 		rc = spdk_nvmf_subsystem_destroy(subsystem, _nvmf_tgt_subsystem_destroy, NULL);
257 		if (rc) {
258 			if (rc == -EINPROGRESS) {
259 				/* If ret is -EINPROGRESS, nvmf_tgt_subsystem_destroyed will be called when subsystem
260 				 * is destroyed, _nvmf_tgt_subsystem_destroy will continue to destroy other subsystems if any */
261 				return;
262 			} else {
263 				SPDK_ERRLOG("Unable to destroy subsystem %s, rc %d. Trying others.\n",
264 					    spdk_nvmf_subsystem_get_nqn(subsystem), rc);
265 			}
266 		}
267 		subsystem = next_subsystem;
268 	}
269 
270 	g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
271 	nvmf_tgt_advance_state();
272 }
273 
274 static void
275 nvmf_tgt_destroy_done(void *ctx, int status)
276 {
277 	g_tgt_state = NVMF_TGT_STOPPED;
278 
279 	nvmf_tgt_advance_state();
280 }
281 
282 static int
283 nvmf_add_discovery_subsystem(void)
284 {
285 	struct spdk_nvmf_subsystem *subsystem;
286 
287 	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN,
288 					       SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT, 0);
289 	if (subsystem == NULL) {
290 		SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
291 		return -1;
292 	}
293 
294 	spdk_nvmf_subsystem_set_allow_any_host(subsystem, true);
295 
296 	return 0;
297 }
298 
299 static int
300 nvmf_tgt_create_target(void)
301 {
302 	struct spdk_nvmf_target_opts opts = {
303 		.name = "nvmf_tgt"
304 	};
305 
306 	opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems;
307 	opts.crdt[0] = g_spdk_nvmf_tgt_crdt[0];
308 	opts.crdt[1] = g_spdk_nvmf_tgt_crdt[1];
309 	opts.crdt[2] = g_spdk_nvmf_tgt_crdt[2];
310 	opts.discovery_filter = g_spdk_nvmf_tgt_conf.discovery_filter;
311 	g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts);
312 	if (!g_spdk_nvmf_tgt) {
313 		SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n");
314 		return -1;
315 	}
316 
317 	if (nvmf_add_discovery_subsystem() != 0) {
318 		SPDK_ERRLOG("nvmf_add_discovery_subsystem failed\n");
319 		return -1;
320 	}
321 
322 	return 0;
323 }
324 
325 static void
326 fixup_identify_ctrlr(struct spdk_nvmf_request *req)
327 {
328 	struct spdk_nvme_ctrlr_data nvme_cdata = {};
329 	struct spdk_nvme_ctrlr_data nvmf_cdata = {};
330 	struct spdk_nvmf_ctrlr *ctrlr = spdk_nvmf_request_get_ctrlr(req);
331 	struct spdk_nvme_cpl *rsp = spdk_nvmf_request_get_response(req);
332 	size_t datalen;
333 	int rc;
334 
335 	/* This is the identify data from the NVMe drive */
336 	datalen = spdk_nvmf_request_copy_to_buf(req, &nvme_cdata,
337 						sizeof(nvme_cdata));
338 
339 	/* Get the NVMF identify data */
340 	rc = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, &nvmf_cdata);
341 	if (rc != SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
342 		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
343 		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
344 		return;
345 	}
346 
347 	/* Fixup NVMF identify data with NVMe identify data */
348 
349 	/* Serial Number (SN) */
350 	memcpy(&nvmf_cdata.sn[0], &nvme_cdata.sn[0], sizeof(nvmf_cdata.sn));
351 	/* Model Number (MN) */
352 	memcpy(&nvmf_cdata.mn[0], &nvme_cdata.mn[0], sizeof(nvmf_cdata.mn));
353 	/* Firmware Revision (FR) */
354 	memcpy(&nvmf_cdata.fr[0], &nvme_cdata.fr[0], sizeof(nvmf_cdata.fr));
355 	/* IEEE OUI Identifier (IEEE) */
356 	memcpy(&nvmf_cdata.ieee[0], &nvme_cdata.ieee[0], sizeof(nvmf_cdata.ieee));
357 	/* FRU Globally Unique Identifier (FGUID) */
358 
359 	/* Copy the fixed up data back to the response */
360 	spdk_nvmf_request_copy_from_buf(req, &nvmf_cdata, datalen);
361 }
362 
363 static int
364 nvmf_custom_identify_hdlr(struct spdk_nvmf_request *req)
365 {
366 	struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
367 	struct spdk_bdev *bdev;
368 	struct spdk_bdev_desc *desc;
369 	struct spdk_io_channel *ch;
370 	struct spdk_nvmf_subsystem *subsys;
371 	int rc;
372 
373 	if (cmd->cdw10_bits.identify.cns != SPDK_NVME_IDENTIFY_CTRLR) {
374 		return -1; /* continue */
375 	}
376 
377 	subsys = spdk_nvmf_request_get_subsystem(req);
378 	if (subsys == NULL) {
379 		return -1;
380 	}
381 
382 	/* Only procss this request if it has exactly one namespace */
383 	if (spdk_nvmf_subsystem_get_max_nsid(subsys) != 1) {
384 		return -1;
385 	}
386 
387 	/* Forward to first namespace if it supports NVME admin commands */
388 	rc = spdk_nvmf_request_get_bdev(1, req, &bdev, &desc, &ch);
389 	if (rc) {
390 		/* No bdev found for this namespace. Continue. */
391 		return -1;
392 	}
393 
394 	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) {
395 		return -1;
396 	}
397 
398 	return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, fixup_identify_ctrlr);
399 }
400 
401 static void
402 nvmf_tgt_advance_state(void)
403 {
404 	enum nvmf_tgt_state prev_state;
405 	int rc = -1;
406 	int ret;
407 
408 	do {
409 		SPDK_DTRACE_PROBE1(nvmf_tgt_state, g_tgt_state);
410 		prev_state = g_tgt_state;
411 
412 		switch (g_tgt_state) {
413 		case NVMF_TGT_INIT_NONE: {
414 			g_tgt_state = NVMF_TGT_INIT_CREATE_TARGET;
415 			break;
416 		}
417 		case NVMF_TGT_INIT_CREATE_TARGET:
418 			ret = nvmf_tgt_create_target();
419 			g_tgt_state = (ret == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR;
420 			break;
421 		case NVMF_TGT_INIT_CREATE_POLL_GROUPS:
422 			if (g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr) {
423 				SPDK_NOTICELOG("Custom identify ctrlr handler enabled\n");
424 				spdk_nvmf_set_custom_admin_cmd_hdlr(SPDK_NVME_OPC_IDENTIFY, nvmf_custom_identify_hdlr);
425 			}
426 			/* Create poll group threads, and send a message to each thread
427 			 * and create a poll group.
428 			 */
429 			nvmf_tgt_create_poll_groups();
430 			break;
431 		case NVMF_TGT_INIT_START_SUBSYSTEMS: {
432 			struct spdk_nvmf_subsystem *subsystem;
433 
434 			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
435 
436 			if (subsystem) {
437 				ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
438 				if (ret) {
439 					SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
440 					g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
441 				}
442 			} else {
443 				g_tgt_state = NVMF_TGT_RUNNING;
444 			}
445 			break;
446 		}
447 		case NVMF_TGT_RUNNING:
448 			spdk_subsystem_init_next(0);
449 			break;
450 		case NVMF_TGT_FINI_STOP_SUBSYSTEMS: {
451 			struct spdk_nvmf_subsystem *subsystem;
452 
453 			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
454 
455 			if (subsystem) {
456 				ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
457 				if (ret) {
458 					nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
459 				}
460 			} else {
461 				g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS;
462 			}
463 			break;
464 		}
465 		case NVMF_TGT_FINI_DESTROY_SUBSYSTEMS:
466 			_nvmf_tgt_subsystem_destroy(NULL);
467 			/* Function above can be asynchronous, it will call nvmf_tgt_advance_state() once done.
468 			 * So just return here */
469 			return;
470 		case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
471 			/* Send a message to each poll group thread, and terminate the thread */
472 			nvmf_tgt_destroy_poll_groups();
473 			break;
474 		case NVMF_TGT_FINI_DESTROY_TARGET:
475 			spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
476 			break;
477 		case NVMF_TGT_STOPPED:
478 			spdk_subsystem_fini_next();
479 			return;
480 		case NVMF_TGT_ERROR:
481 			spdk_subsystem_init_next(rc);
482 			return;
483 		}
484 
485 	} while (g_tgt_state != prev_state);
486 }
487 
488 static void
489 nvmf_subsystem_init(void)
490 {
491 	g_tgt_state = NVMF_TGT_INIT_NONE;
492 	nvmf_tgt_advance_state();
493 }
494 
495 static void
496 nvmf_subsystem_dump_discover_filter(struct spdk_json_write_ctx *w)
497 {
498 	static char const *const answers[] = {
499 		"match_any",
500 		"transport",
501 		"address",
502 		"transport,address",
503 		"svcid",
504 		"transport,svcid",
505 		"address,svcid",
506 		"transport,address,svcid"
507 	};
508 
509 	if ((g_spdk_nvmf_tgt_conf.discovery_filter & ~(SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_TYPE |
510 			SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_ADDRESS |
511 			SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_SVCID)) != 0) {
512 		SPDK_ERRLOG("Incorrect discovery filter %d\n", g_spdk_nvmf_tgt_conf.discovery_filter);
513 		assert(0);
514 		return;
515 	}
516 
517 	spdk_json_write_named_string(w, "discovery_filter", answers[g_spdk_nvmf_tgt_conf.discovery_filter]);
518 }
519 
520 static void
521 nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w)
522 {
523 	spdk_json_write_array_begin(w);
524 
525 	spdk_json_write_object_begin(w);
526 	spdk_json_write_named_string(w, "method", "nvmf_set_config");
527 
528 	spdk_json_write_named_object_begin(w, "params");
529 	nvmf_subsystem_dump_discover_filter(w);
530 	spdk_json_write_named_object_begin(w, "admin_cmd_passthru");
531 	spdk_json_write_named_bool(w, "identify_ctrlr",
532 				   g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr);
533 	spdk_json_write_object_end(w);
534 	if (g_poll_groups_mask) {
535 		spdk_json_write_named_string(w, "poll_groups_mask", spdk_cpuset_fmt(g_poll_groups_mask));
536 	}
537 	spdk_json_write_object_end(w);
538 	spdk_json_write_object_end(w);
539 
540 	spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt);
541 	spdk_json_write_array_end(w);
542 }
543 
544 static struct spdk_subsystem g_spdk_subsystem_nvmf = {
545 	.name = "nvmf",
546 	.init = nvmf_subsystem_init,
547 	.fini = nvmf_subsystem_fini,
548 	.write_config_json = nvmf_subsystem_write_config_json,
549 };
550 
551 SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf)
552 SPDK_SUBSYSTEM_DEPEND(nvmf, bdev)
553 SPDK_SUBSYSTEM_DEPEND(nvmf, sock)
554