xref: /spdk/examples/nvme/hotplug/hotplug.c (revision 91fcde065a5883d85ca1034a9a1b254e1eadbcad)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2016 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/nvme.h"
9 #include "spdk/queue.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/log.h"
13 #include "spdk/rpc.h"
14 
15 static const char *g_rpc_addr = "/var/tmp/spdk.sock";
16 
17 struct dev_ctx {
18 	TAILQ_ENTRY(dev_ctx)	tailq;
19 	bool			is_new;
20 	bool			is_removed;
21 	bool			is_draining;
22 	struct spdk_nvme_ctrlr	*ctrlr;
23 	struct spdk_nvme_ns	*ns;
24 	struct spdk_nvme_qpair	*qpair;
25 	uint32_t		io_size_blocks;
26 	uint64_t		size_in_ios;
27 	uint64_t		io_completed;
28 	uint64_t		prev_io_completed;
29 	uint64_t		current_queue_depth;
30 	uint64_t		offset_in_ios;
31 	char			name[1024];
32 };
33 
34 struct perf_task {
35 	struct dev_ctx		*dev;
36 	void			*buf;
37 };
38 
39 static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs);
40 
41 static uint64_t g_tsc_rate;
42 
43 static uint32_t g_io_size_bytes = 4096;
44 static int g_queue_depth = 4;
45 static int g_time_in_sec;
46 static int g_expected_insert_times = -1;
47 static int g_expected_removal_times = -1;
48 static int g_insert_times;
49 static int g_removal_times;
50 static int g_shm_id = -1;
51 static const char *g_iova_mode = NULL;
52 static uint64_t g_timeout_in_us = SPDK_SEC_TO_USEC;
53 static struct spdk_nvme_detach_ctx *g_detach_ctx;
54 
55 static bool g_wait_for_rpc = false;
56 static bool g_rpc_received = false;
57 
58 static void task_complete(struct perf_task *task);
59 
60 static void timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
61 		       struct spdk_nvme_qpair *qpair, uint16_t cid);
62 
63 static void
64 register_dev(struct spdk_nvme_ctrlr *ctrlr)
65 {
66 	struct dev_ctx *dev;
67 	const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
68 
69 	dev = calloc(1, sizeof(*dev));
70 	if (dev == NULL) {
71 		perror("dev_ctx malloc");
72 		exit(1);
73 	}
74 
75 	snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
76 
77 	dev->ctrlr = ctrlr;
78 	dev->is_new = true;
79 	dev->is_removed = false;
80 	dev->is_draining = false;
81 
82 	spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout_in_us, g_timeout_in_us, timeout_cb,
83 			NULL);
84 
85 	dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1);
86 
87 	if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) {
88 		fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name);
89 		goto skip;
90 	}
91 
92 	if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes ||
93 	    spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) {
94 		fprintf(stderr, "Controller %s: Invalid "
95 			"ns size %" PRIu64 " / block size %u for I/O size %u\n",
96 			dev->name,
97 			spdk_nvme_ns_get_size(dev->ns),
98 			spdk_nvme_ns_get_sector_size(dev->ns),
99 			g_io_size_bytes);
100 		goto skip;
101 	}
102 
103 	dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes;
104 	dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns);
105 
106 	dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
107 	if (!dev->qpair) {
108 		fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");
109 		goto skip;
110 	}
111 	g_insert_times++;
112 	TAILQ_INSERT_TAIL(&g_devs, dev, tailq);
113 	return;
114 
115 skip:
116 	free(dev);
117 }
118 
119 static void
120 unregister_dev(struct dev_ctx *dev)
121 {
122 	fprintf(stderr, "unregister_dev: %s\n", dev->name);
123 
124 	spdk_nvme_ctrlr_free_io_qpair(dev->qpair);
125 	spdk_nvme_detach_async(dev->ctrlr, &g_detach_ctx);
126 
127 	TAILQ_REMOVE(&g_devs, dev, tailq);
128 	free(dev);
129 }
130 
131 static struct perf_task *
132 alloc_task(struct dev_ctx *dev)
133 {
134 	struct perf_task *task;
135 
136 	task = calloc(1, sizeof(*task));
137 	if (task == NULL) {
138 		return NULL;
139 	}
140 
141 	task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
142 	if (task->buf == NULL) {
143 		free(task);
144 		return NULL;
145 	}
146 
147 	task->dev = dev;
148 
149 	return task;
150 }
151 
152 static void
153 free_task(struct perf_task *task)
154 {
155 	spdk_dma_free(task->buf);
156 	free(task);
157 }
158 
159 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
160 
161 static void
162 submit_single_io(struct perf_task *task)
163 {
164 	struct dev_ctx		*dev = task->dev;
165 	uint64_t		offset_in_ios;
166 	int			rc;
167 
168 	offset_in_ios = dev->offset_in_ios++;
169 	if (dev->offset_in_ios == dev->size_in_ios) {
170 		dev->offset_in_ios = 0;
171 	}
172 
173 	rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf,
174 				   offset_in_ios * dev->io_size_blocks,
175 				   dev->io_size_blocks, io_complete, task, 0);
176 
177 	if (rc != 0) {
178 		fprintf(stderr, "starting I/O failed\n");
179 		free_task(task);
180 	} else {
181 		dev->current_queue_depth++;
182 	}
183 }
184 
185 static void
186 task_complete(struct perf_task *task)
187 {
188 	struct dev_ctx *dev;
189 
190 	dev = task->dev;
191 	dev->current_queue_depth--;
192 	dev->io_completed++;
193 
194 	/*
195 	 * is_draining indicates when time has expired for the test run
196 	 * and we are just waiting for the previously submitted I/O
197 	 * to complete.  In this case, do not submit a new I/O to replace
198 	 * the one just completed.
199 	 */
200 	if (!dev->is_draining && !dev->is_removed) {
201 		submit_single_io(task);
202 	} else {
203 		free_task(task);
204 	}
205 }
206 
207 static void
208 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
209 {
210 	task_complete((struct perf_task *)ctx);
211 }
212 
213 static void
214 check_io(struct dev_ctx *dev)
215 {
216 	spdk_nvme_qpair_process_completions(dev->qpair, 0);
217 }
218 
219 static void
220 submit_io(struct dev_ctx *dev, int queue_depth)
221 {
222 	struct perf_task *task;
223 
224 	while (queue_depth-- > 0) {
225 		task = alloc_task(dev);
226 		if (task == NULL) {
227 			fprintf(stderr, "task allocation failed\n");
228 			exit(1);
229 		}
230 
231 		submit_single_io(task);
232 	}
233 }
234 
235 static void
236 drain_io(struct dev_ctx *dev)
237 {
238 	dev->is_draining = true;
239 	while (dev->current_queue_depth > 0) {
240 		check_io(dev);
241 	}
242 }
243 
244 static void
245 print_stats(void)
246 {
247 	struct dev_ctx *dev;
248 
249 	TAILQ_FOREACH(dev, &g_devs, tailq) {
250 		fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n",
251 			dev->name,
252 			dev->io_completed,
253 			dev->io_completed - dev->prev_io_completed);
254 		dev->prev_io_completed = dev->io_completed;
255 	}
256 
257 	fprintf(stderr, "\n");
258 }
259 
260 static bool
261 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
262 	 struct spdk_nvme_ctrlr_opts *opts)
263 {
264 	fprintf(stderr, "Attaching to %s\n", trid->traddr);
265 
266 	return true;
267 }
268 
269 static void
270 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
271 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
272 {
273 	fprintf(stderr, "Attached to %s\n", trid->traddr);
274 
275 	register_dev(ctrlr);
276 }
277 
278 static void
279 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
280 {
281 	struct dev_ctx *dev;
282 
283 	TAILQ_FOREACH(dev, &g_devs, tailq) {
284 		if (dev->ctrlr == ctrlr) {
285 			/*
286 			 * Mark the device as removed, but don't detach yet.
287 			 *
288 			 * The I/O handling code will detach once it sees that
289 			 * is_removed is true and all outstanding I/O have been completed.
290 			 */
291 			dev->is_removed = true;
292 			fprintf(stderr, "Controller removed: %s\n", dev->name);
293 			return;
294 		}
295 	}
296 
297 	/*
298 	 * If we get here, this remove_cb is for a controller that we are not tracking
299 	 * in g_devs (for example, because we skipped it during register_dev),
300 	 * so immediately detach it.
301 	 */
302 	spdk_nvme_detach_async(ctrlr, &g_detach_ctx);
303 }
304 
305 static void
306 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
307 	   struct spdk_nvme_qpair *qpair, uint16_t cid)
308 {
309 	/* leave hotplug monitor loop, use the timeout_cb to monitor the hotplug */
310 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
311 		fprintf(stderr, "spdk_nvme_probe() failed\n");
312 	}
313 }
314 
315 static void
316 io_loop(void)
317 {
318 	struct dev_ctx *dev, *dev_tmp;
319 	uint64_t tsc_end;
320 	uint64_t next_stats_tsc;
321 	int rc;
322 
323 	if (g_time_in_sec > 0) {
324 		tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
325 	} else {
326 		/* User specified 0 seconds for timeout, which means no timeout.
327 		 * So just set tsc_end to UINT64_MAX which ensures the loop
328 		 * will never time out.
329 		 */
330 		tsc_end = UINT64_MAX;
331 	}
332 
333 	next_stats_tsc = spdk_get_ticks();
334 
335 	while (1) {
336 		uint64_t now;
337 
338 		/*
339 		 * Check for completed I/O for each controller. A new
340 		 * I/O will be submitted in the io_complete callback
341 		 * to replace each I/O that is completed.
342 		 */
343 		TAILQ_FOREACH(dev, &g_devs, tailq) {
344 			if (dev->is_new) {
345 				/* Submit initial I/O for this controller. */
346 				submit_io(dev, g_queue_depth);
347 				dev->is_new = false;
348 			}
349 
350 			check_io(dev);
351 		}
352 
353 		/*
354 		 * Check for hotplug events.
355 		 */
356 		if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
357 			fprintf(stderr, "spdk_nvme_probe() failed\n");
358 			break;
359 		}
360 
361 		/*
362 		 * Check for devices which were hot-removed and have finished
363 		 * processing outstanding I/Os.
364 		 *
365 		 * unregister_dev() may remove devs from the list, so use the
366 		 * removal-safe iterator.
367 		 */
368 		TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
369 			if (dev->is_removed && dev->current_queue_depth == 0) {
370 				g_removal_times++;
371 				unregister_dev(dev);
372 			}
373 		}
374 
375 		if (g_detach_ctx) {
376 			rc = spdk_nvme_detach_poll_async(g_detach_ctx);
377 			if (rc == 0) {
378 				g_detach_ctx = NULL;
379 			}
380 		}
381 
382 		if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) {
383 			break;
384 		}
385 
386 		now = spdk_get_ticks();
387 		if (now > tsc_end) {
388 			SPDK_ERRLOG("Timing out hotplug application!\n");
389 			break;
390 		}
391 		if (now > next_stats_tsc) {
392 			print_stats();
393 			next_stats_tsc += g_tsc_rate;
394 		}
395 	}
396 
397 	TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
398 		drain_io(dev);
399 		unregister_dev(dev);
400 	}
401 
402 	if (g_detach_ctx) {
403 		spdk_nvme_detach_poll(g_detach_ctx);
404 	}
405 }
406 
407 static void
408 usage(char *program_name)
409 {
410 	printf("%s options", program_name);
411 	printf("\n");
412 	printf("\t[-c timeout for each command in second(default:1s)]\n");
413 	printf("\t[-i shm id (optional)]\n");
414 	printf("\t[-n expected hot insert times]\n");
415 	printf("\t[-r expected hot removal times]\n");
416 	printf("\t[-t time in seconds to wait for all events (default: forever)]\n");
417 	printf("\t[-m iova mode: pa or va (optional)\n");
418 	printf("\t[-l log level]\n");
419 	printf("\t Available log levels:\n");
420 	printf("\t  disabled, error, warning, notice, info, debug\n");
421 	printf("\t[--wait-for-rpc wait for RPC perform_tests\n");
422 	printf("\t  to proceed with starting IO on NVMe disks]\n");
423 }
424 
425 static const struct option g_wait_option[] = {
426 #define WAIT_FOR_RPC_OPT_IDX	257
427 	{"wait-for-rpc", no_argument, NULL, WAIT_FOR_RPC_OPT_IDX},
428 };
429 
430 static int
431 parse_args(int argc, char **argv)
432 {
433 	int op, opt_idx;
434 	long int val;
435 
436 	/* default value */
437 	g_time_in_sec = 0;
438 
439 	while ((op = getopt_long(argc, argv, "c:i:l:m:n:r:t:", g_wait_option, &opt_idx)) != -1) {
440 		if (op == '?') {
441 			usage(argv[0]);
442 			return 1;
443 		}
444 
445 		switch (op) {
446 		case WAIT_FOR_RPC_OPT_IDX:
447 			g_wait_for_rpc = true;
448 			break;
449 		case 'c':
450 		case 'i':
451 		case 'n':
452 		case 'r':
453 		case 't':
454 			val = spdk_strtol(optarg, 10);
455 			if (val < 0) {
456 				fprintf(stderr, "Converting a string to integer failed\n");
457 				return val;
458 			}
459 			switch (op) {
460 			case 'c':
461 				g_timeout_in_us = val * SPDK_SEC_TO_USEC;
462 				break;
463 			case 'i':
464 				g_shm_id = val;
465 				break;
466 			case 'n':
467 				g_expected_insert_times = val;
468 				break;
469 			case 'r':
470 				g_expected_removal_times = val;
471 				break;
472 			case 't':
473 				g_time_in_sec = val;
474 				break;
475 			}
476 			break;
477 		case 'm':
478 			g_iova_mode = optarg;
479 			break;
480 		case 'l':
481 			if (!strcmp(optarg, "disabled")) {
482 				spdk_log_set_print_level(SPDK_LOG_DISABLED);
483 			} else if (!strcmp(optarg, "error")) {
484 				spdk_log_set_print_level(SPDK_LOG_ERROR);
485 			} else if (!strcmp(optarg, "warning")) {
486 				spdk_log_set_print_level(SPDK_LOG_WARN);
487 			} else if (!strcmp(optarg, "notice")) {
488 				spdk_log_set_print_level(SPDK_LOG_NOTICE);
489 			} else if (!strcmp(optarg, "info")) {
490 				spdk_log_set_print_level(SPDK_LOG_INFO);
491 			} else if (!strcmp(optarg, "debug")) {
492 				spdk_log_set_print_level(SPDK_LOG_DEBUG);
493 			} else {
494 				fprintf(stderr, "Unrecognized log level: %s\n", optarg);
495 				return 1;
496 			}
497 			break;
498 		default:
499 			usage(argv[0]);
500 			return 1;
501 		}
502 	}
503 
504 	return 0;
505 }
506 
507 
508 static int
509 register_controllers(void)
510 {
511 	fprintf(stderr, "Initializing NVMe Controllers\n");
512 
513 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
514 		fprintf(stderr, "spdk_nvme_probe() failed\n");
515 		return 1;
516 	}
517 	/* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */
518 	g_insert_times = 0;
519 	return 0;
520 }
521 
522 /* Hotplug RPC */
523 static void
524 rpc_perform_tests(struct spdk_jsonrpc_request *request,
525 		  const struct spdk_json_val *params)
526 {
527 	if (params) {
528 		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
529 						 "'perform_tests' requires no arguments");
530 		return;
531 	}
532 
533 	spdk_jsonrpc_send_bool_response(request, true);
534 
535 	g_rpc_received = true;
536 }
537 SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME);
538 
539 static void
540 wait_for_rpc_call(void)
541 {
542 	fprintf(stderr,
543 		"Listening for perform_tests to start the application...\n");
544 	spdk_rpc_listen(g_rpc_addr);
545 	spdk_rpc_set_state(SPDK_RPC_RUNTIME);
546 
547 	while (!g_rpc_received) {
548 		spdk_rpc_accept();
549 	}
550 	/* Run spdk_rpc_accept() one more time to trigger
551 	 * spdk_jsonrpv_server_poll() and send the RPC response. */
552 	spdk_rpc_accept();
553 }
554 
555 int
556 main(int argc, char **argv)
557 {
558 	int rc;
559 	struct spdk_env_opts opts;
560 
561 	rc = parse_args(argc, argv);
562 	if (rc != 0) {
563 		return rc;
564 	}
565 
566 	spdk_env_opts_init(&opts);
567 	opts.name = "hotplug";
568 	opts.core_mask = "0x1";
569 	if (g_shm_id > -1) {
570 		opts.shm_id = g_shm_id;
571 	}
572 	if (g_iova_mode) {
573 		opts.iova_mode = g_iova_mode;
574 	}
575 	if (spdk_env_init(&opts) < 0) {
576 		fprintf(stderr, "Unable to initialize SPDK env\n");
577 		return 1;
578 	}
579 
580 	g_tsc_rate = spdk_get_ticks_hz();
581 
582 	/* Detect the controllers that are plugged in at startup. */
583 	if (register_controllers() != 0) {
584 		rc = 1;
585 		goto cleanup;
586 	}
587 
588 	if (g_wait_for_rpc) {
589 		wait_for_rpc_call();
590 	}
591 
592 	fprintf(stderr, "Initialization complete. Starting I/O...\n");
593 	io_loop();
594 
595 	if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) {
596 		fprintf(stderr, "Expected inserts %d != actual inserts %d\n",
597 			g_expected_insert_times, g_insert_times);
598 		rc = 1;
599 		goto cleanup;
600 	}
601 
602 	if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) {
603 		fprintf(stderr, "Expected removals %d != actual removals %d\n",
604 			g_expected_removal_times, g_removal_times);
605 		rc = 1;
606 	}
607 
608 cleanup:
609 	spdk_rpc_close();
610 	spdk_env_fini();
611 	return rc;
612 }
613