xref: /spdk/examples/nvme/hotplug/hotplug.c (revision fecffda6ecf8853b82edccde429b68252f0a62c5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2016 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/nvme.h"
9 #include "spdk/queue.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/log.h"
13 #include "spdk/rpc.h"
14 
15 static const char *g_rpc_addr = "/var/tmp/spdk.sock";
16 
17 struct dev_ctx {
18 	TAILQ_ENTRY(dev_ctx)	tailq;
19 	bool			is_new;
20 	bool			is_removed;
21 	bool			is_draining;
22 	struct spdk_nvme_ctrlr	*ctrlr;
23 	struct spdk_nvme_ns	*ns;
24 	struct spdk_nvme_qpair	*qpair;
25 	uint32_t		io_size_blocks;
26 	uint64_t		size_in_ios;
27 	uint64_t		io_completed;
28 	uint64_t		prev_io_completed;
29 	uint64_t		current_queue_depth;
30 	uint64_t		offset_in_ios;
31 	char			name[1024];
32 };
33 
34 struct perf_task {
35 	struct dev_ctx		*dev;
36 	void			*buf;
37 };
38 
39 static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs);
40 
41 static uint64_t g_tsc_rate;
42 
43 static uint32_t g_io_size_bytes = 4096;
44 static int g_queue_depth = 4;
45 static int g_time_in_sec;
46 static int g_expected_insert_times = -1;
47 static int g_expected_removal_times = -1;
48 static int g_insert_times;
49 static int g_removal_times;
50 static int g_shm_id = -1;
51 static const char *g_iova_mode = NULL;
52 static uint64_t g_timeout_in_us = SPDK_SEC_TO_USEC;
53 static struct spdk_nvme_detach_ctx *g_detach_ctx;
54 
55 static bool g_wait_for_rpc = false;
56 static bool g_rpc_received = false;
57 
58 static void task_complete(struct perf_task *task);
59 
60 static void timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
61 		       struct spdk_nvme_qpair *qpair, uint16_t cid);
62 
63 static void
64 register_dev(struct spdk_nvme_ctrlr *ctrlr)
65 {
66 	struct dev_ctx *dev;
67 	const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
68 
69 	dev = calloc(1, sizeof(*dev));
70 	if (dev == NULL) {
71 		perror("dev_ctx malloc");
72 		exit(1);
73 	}
74 
75 	snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
76 
77 	dev->ctrlr = ctrlr;
78 	dev->is_new = true;
79 	dev->is_removed = false;
80 	dev->is_draining = false;
81 
82 	spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout_in_us, g_timeout_in_us, timeout_cb,
83 			NULL);
84 
85 	dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1);
86 
87 	if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) {
88 		fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name);
89 		goto skip;
90 	}
91 
92 	if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes ||
93 	    spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) {
94 		fprintf(stderr, "Controller %s: Invalid "
95 			"ns size %" PRIu64 " / block size %u for I/O size %u\n",
96 			dev->name,
97 			spdk_nvme_ns_get_size(dev->ns),
98 			spdk_nvme_ns_get_sector_size(dev->ns),
99 			g_io_size_bytes);
100 		goto skip;
101 	}
102 
103 	dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes;
104 	dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns);
105 
106 	dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
107 	if (!dev->qpair) {
108 		fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");
109 		goto skip;
110 	}
111 	g_insert_times++;
112 	TAILQ_INSERT_TAIL(&g_devs, dev, tailq);
113 	return;
114 
115 skip:
116 	free(dev);
117 }
118 
119 static void
120 unregister_dev(struct dev_ctx *dev)
121 {
122 	fprintf(stderr, "unregister_dev: %s\n", dev->name);
123 
124 	spdk_nvme_ctrlr_free_io_qpair(dev->qpair);
125 	spdk_nvme_detach_async(dev->ctrlr, &g_detach_ctx);
126 
127 	TAILQ_REMOVE(&g_devs, dev, tailq);
128 	free(dev);
129 }
130 
131 static struct perf_task *
132 alloc_task(struct dev_ctx *dev)
133 {
134 	struct perf_task *task;
135 
136 	task = calloc(1, sizeof(*task));
137 	if (task == NULL) {
138 		return NULL;
139 	}
140 
141 	task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
142 	if (task->buf == NULL) {
143 		free(task);
144 		return NULL;
145 	}
146 
147 	task->dev = dev;
148 
149 	return task;
150 }
151 
152 static void
153 free_task(struct perf_task *task)
154 {
155 	spdk_dma_free(task->buf);
156 	free(task);
157 }
158 
159 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
160 
161 static void
162 submit_single_io(struct perf_task *task)
163 {
164 	struct dev_ctx		*dev = task->dev;
165 	uint64_t		offset_in_ios;
166 	int			rc;
167 
168 	offset_in_ios = dev->offset_in_ios++;
169 	if (dev->offset_in_ios == dev->size_in_ios) {
170 		dev->offset_in_ios = 0;
171 	}
172 
173 	rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf,
174 				   offset_in_ios * dev->io_size_blocks,
175 				   dev->io_size_blocks, io_complete, task, 0);
176 
177 	if (rc != 0) {
178 		fprintf(stderr, "starting I/O failed\n");
179 		free_task(task);
180 	} else {
181 		dev->current_queue_depth++;
182 	}
183 }
184 
185 static void
186 task_complete(struct perf_task *task)
187 {
188 	struct dev_ctx *dev;
189 
190 	dev = task->dev;
191 	dev->current_queue_depth--;
192 	dev->io_completed++;
193 
194 	/*
195 	 * is_draining indicates when time has expired for the test run
196 	 * and we are just waiting for the previously submitted I/O
197 	 * to complete.  In this case, do not submit a new I/O to replace
198 	 * the one just completed.
199 	 */
200 	if (!dev->is_draining && !dev->is_removed) {
201 		submit_single_io(task);
202 	} else {
203 		free_task(task);
204 	}
205 }
206 
207 static void
208 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
209 {
210 	task_complete((struct perf_task *)ctx);
211 }
212 
213 static void
214 check_io(struct dev_ctx *dev)
215 {
216 	spdk_nvme_qpair_process_completions(dev->qpair, 0);
217 }
218 
219 static void
220 submit_io(struct dev_ctx *dev, int queue_depth)
221 {
222 	struct perf_task *task;
223 
224 	while (queue_depth-- > 0) {
225 		task = alloc_task(dev);
226 		if (task == NULL) {
227 			fprintf(stderr, "task allocation failed\n");
228 			exit(1);
229 		}
230 
231 		submit_single_io(task);
232 	}
233 }
234 
235 static void
236 drain_io(struct dev_ctx *dev)
237 {
238 	dev->is_draining = true;
239 	while (dev->current_queue_depth > 0) {
240 		check_io(dev);
241 	}
242 }
243 
244 static void
245 print_stats(void)
246 {
247 	struct dev_ctx *dev;
248 
249 	TAILQ_FOREACH(dev, &g_devs, tailq) {
250 		fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n",
251 			dev->name,
252 			dev->io_completed,
253 			dev->io_completed - dev->prev_io_completed);
254 		dev->prev_io_completed = dev->io_completed;
255 	}
256 
257 	fprintf(stderr, "\n");
258 }
259 
260 static bool
261 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
262 	 struct spdk_nvme_ctrlr_opts *opts)
263 {
264 	fprintf(stderr, "Attaching to %s\n", trid->traddr);
265 
266 	return true;
267 }
268 
269 static void
270 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
271 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
272 {
273 	fprintf(stderr, "Attached to %s\n", trid->traddr);
274 
275 	register_dev(ctrlr);
276 }
277 
278 static void
279 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
280 {
281 	struct dev_ctx *dev;
282 
283 	TAILQ_FOREACH(dev, &g_devs, tailq) {
284 		if (dev->ctrlr == ctrlr) {
285 			/*
286 			 * Mark the device as removed, but don't detach yet.
287 			 *
288 			 * The I/O handling code will detach once it sees that
289 			 * is_removed is true and all outstanding I/O have been completed.
290 			 */
291 			dev->is_removed = true;
292 			fprintf(stderr, "Controller removed: %s\n", dev->name);
293 			return;
294 		}
295 	}
296 
297 	/*
298 	 * If we get here, this remove_cb is for a controller that we are not tracking
299 	 * in g_devs (for example, because we skipped it during register_dev),
300 	 * so immediately detach it.
301 	 */
302 	spdk_nvme_detach_async(ctrlr, &g_detach_ctx);
303 }
304 
305 static void
306 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
307 	   struct spdk_nvme_qpair *qpair, uint16_t cid)
308 {
309 	/* leave hotplug monitor loop, use the timeout_cb to monitor the hotplug */
310 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
311 		fprintf(stderr, "spdk_nvme_probe() failed\n");
312 	}
313 }
314 
315 static void
316 io_loop(void)
317 {
318 	struct dev_ctx *dev, *dev_tmp;
319 	uint64_t tsc_end;
320 	uint64_t next_stats_tsc;
321 	int rc;
322 
323 	tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
324 	next_stats_tsc = spdk_get_ticks();
325 
326 	while (1) {
327 		uint64_t now;
328 
329 		/*
330 		 * Check for completed I/O for each controller. A new
331 		 * I/O will be submitted in the io_complete callback
332 		 * to replace each I/O that is completed.
333 		 */
334 		TAILQ_FOREACH(dev, &g_devs, tailq) {
335 			if (dev->is_new) {
336 				/* Submit initial I/O for this controller. */
337 				submit_io(dev, g_queue_depth);
338 				dev->is_new = false;
339 			}
340 
341 			check_io(dev);
342 		}
343 
344 		/*
345 		 * Check for hotplug events.
346 		 */
347 		if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
348 			fprintf(stderr, "spdk_nvme_probe() failed\n");
349 			break;
350 		}
351 
352 		/*
353 		 * Check for devices which were hot-removed and have finished
354 		 * processing outstanding I/Os.
355 		 *
356 		 * unregister_dev() may remove devs from the list, so use the
357 		 * removal-safe iterator.
358 		 */
359 		TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
360 			if (dev->is_removed && dev->current_queue_depth == 0) {
361 				g_removal_times++;
362 				unregister_dev(dev);
363 			}
364 		}
365 
366 		if (g_detach_ctx) {
367 			rc = spdk_nvme_detach_poll_async(g_detach_ctx);
368 			if (rc == 0) {
369 				g_detach_ctx = NULL;
370 			}
371 		}
372 
373 		now = spdk_get_ticks();
374 		if (now > tsc_end) {
375 			break;
376 		}
377 		if (now > next_stats_tsc) {
378 			print_stats();
379 			next_stats_tsc += g_tsc_rate;
380 		}
381 
382 		if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) {
383 			break;
384 		}
385 	}
386 
387 	TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
388 		drain_io(dev);
389 		unregister_dev(dev);
390 	}
391 
392 	if (g_detach_ctx) {
393 		spdk_nvme_detach_poll(g_detach_ctx);
394 	}
395 }
396 
397 static void
398 usage(char *program_name)
399 {
400 	printf("%s options", program_name);
401 	printf("\n");
402 	printf("\t[-c timeout for each command in second(default:1s)]\n");
403 	printf("\t[-i shm id (optional)]\n");
404 	printf("\t[-n expected hot insert times]\n");
405 	printf("\t[-r expected hot removal times]\n");
406 	printf("\t[-t time in seconds]\n");
407 	printf("\t[-m iova mode: pa or va (optional)\n");
408 	printf("\t[-l log level]\n");
409 	printf("\t Available log levels:\n");
410 	printf("\t  disabled, error, warning, notice, info, debug\n");
411 	printf("\t[--wait-for-rpc wait for RPC perform_tests\n");
412 	printf("\t  to proceed with starting IO on NVMe disks]\n");
413 }
414 
415 static const struct option g_wait_option[] = {
416 #define WAIT_FOR_RPC_OPT_IDX	257
417 	{"wait-for-rpc", no_argument, NULL, WAIT_FOR_RPC_OPT_IDX},
418 };
419 
420 static int
421 parse_args(int argc, char **argv)
422 {
423 	int op, opt_idx;
424 	long int val;
425 
426 	/* default value */
427 	g_time_in_sec = 0;
428 
429 	while ((op = getopt_long(argc, argv, "c:i:l:m:n:r:t:", g_wait_option, &opt_idx)) != -1) {
430 		if (op == '?') {
431 			usage(argv[0]);
432 			return 1;
433 		}
434 
435 		switch (op) {
436 		case WAIT_FOR_RPC_OPT_IDX:
437 			g_wait_for_rpc = true;
438 			break;
439 		case 'c':
440 		case 'i':
441 		case 'n':
442 		case 'r':
443 		case 't':
444 			val = spdk_strtol(optarg, 10);
445 			if (val < 0) {
446 				fprintf(stderr, "Converting a string to integer failed\n");
447 				return val;
448 			}
449 			switch (op) {
450 			case 'c':
451 				g_timeout_in_us = val * SPDK_SEC_TO_USEC;
452 				break;
453 			case 'i':
454 				g_shm_id = val;
455 				break;
456 			case 'n':
457 				g_expected_insert_times = val;
458 				break;
459 			case 'r':
460 				g_expected_removal_times = val;
461 				break;
462 			case 't':
463 				g_time_in_sec = val;
464 				break;
465 			}
466 			break;
467 		case 'm':
468 			g_iova_mode = optarg;
469 			break;
470 		case 'l':
471 			if (!strcmp(optarg, "disabled")) {
472 				spdk_log_set_print_level(SPDK_LOG_DISABLED);
473 			} else if (!strcmp(optarg, "error")) {
474 				spdk_log_set_print_level(SPDK_LOG_ERROR);
475 			} else if (!strcmp(optarg, "warning")) {
476 				spdk_log_set_print_level(SPDK_LOG_WARN);
477 			} else if (!strcmp(optarg, "notice")) {
478 				spdk_log_set_print_level(SPDK_LOG_NOTICE);
479 			} else if (!strcmp(optarg, "info")) {
480 				spdk_log_set_print_level(SPDK_LOG_INFO);
481 			} else if (!strcmp(optarg, "debug")) {
482 				spdk_log_set_print_level(SPDK_LOG_DEBUG);
483 			} else {
484 				fprintf(stderr, "Unrecognized log level: %s\n", optarg);
485 				return 1;
486 			}
487 			break;
488 		default:
489 			usage(argv[0]);
490 			return 1;
491 		}
492 	}
493 
494 	if (!g_time_in_sec) {
495 		usage(argv[0]);
496 		return 1;
497 	}
498 
499 	return 0;
500 }
501 
502 
503 static int
504 register_controllers(void)
505 {
506 	fprintf(stderr, "Initializing NVMe Controllers\n");
507 
508 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
509 		fprintf(stderr, "spdk_nvme_probe() failed\n");
510 		return 1;
511 	}
512 	/* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */
513 	g_insert_times = 0;
514 	return 0;
515 }
516 
517 /* Hotplug RPC */
518 static void
519 rpc_perform_tests(struct spdk_jsonrpc_request *request,
520 		  const struct spdk_json_val *params)
521 {
522 	if (params) {
523 		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
524 						 "'perform_tests' requires no arguments");
525 		return;
526 	}
527 
528 	spdk_jsonrpc_send_bool_response(request, true);
529 
530 	g_rpc_received = true;
531 }
532 SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME);
533 
534 static void
535 wait_for_rpc_call(void)
536 {
537 	fprintf(stderr,
538 		"Listening for perform_tests to start the application...\n");
539 	spdk_rpc_listen(g_rpc_addr);
540 	spdk_rpc_set_state(SPDK_RPC_RUNTIME);
541 
542 	while (!g_rpc_received) {
543 		spdk_rpc_accept();
544 	}
545 	/* Run spdk_rpc_accept() one more time to trigger
546 	 * spdk_jsonrpv_server_poll() and send the RPC response. */
547 	spdk_rpc_accept();
548 }
549 
550 int
551 main(int argc, char **argv)
552 {
553 	int rc;
554 	struct spdk_env_opts opts;
555 
556 	rc = parse_args(argc, argv);
557 	if (rc != 0) {
558 		return rc;
559 	}
560 
561 	spdk_env_opts_init(&opts);
562 	opts.name = "hotplug";
563 	opts.core_mask = "0x1";
564 	if (g_shm_id > -1) {
565 		opts.shm_id = g_shm_id;
566 	}
567 	if (g_iova_mode) {
568 		opts.iova_mode = g_iova_mode;
569 	}
570 	if (spdk_env_init(&opts) < 0) {
571 		fprintf(stderr, "Unable to initialize SPDK env\n");
572 		return 1;
573 	}
574 
575 	g_tsc_rate = spdk_get_ticks_hz();
576 
577 	/* Detect the controllers that are plugged in at startup. */
578 	if (register_controllers() != 0) {
579 		rc = 1;
580 		goto cleanup;
581 	}
582 
583 	if (g_wait_for_rpc) {
584 		wait_for_rpc_call();
585 	}
586 
587 	fprintf(stderr, "Initialization complete. Starting I/O...\n");
588 	io_loop();
589 
590 	if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) {
591 		fprintf(stderr, "Expected inserts %d != actual inserts %d\n",
592 			g_expected_insert_times, g_insert_times);
593 		rc = 1;
594 		goto cleanup;
595 	}
596 
597 	if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) {
598 		fprintf(stderr, "Expected removals %d != actual removals %d\n",
599 			g_expected_removal_times, g_removal_times);
600 		rc = 1;
601 	}
602 
603 cleanup:
604 	spdk_env_fini();
605 	return rc;
606 }
607