xref: /spdk/examples/nvme/hotplug/hotplug.c (revision 8dd1cd2104ea4001e4a0da2a4851ccd62c82f8e8)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/nvme.h"
9 #include "spdk/queue.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/log.h"
13 
14 struct dev_ctx {
15 	TAILQ_ENTRY(dev_ctx)	tailq;
16 	bool			is_new;
17 	bool			is_removed;
18 	bool			is_draining;
19 	struct spdk_nvme_ctrlr	*ctrlr;
20 	struct spdk_nvme_ns	*ns;
21 	struct spdk_nvme_qpair	*qpair;
22 	uint32_t		io_size_blocks;
23 	uint64_t		size_in_ios;
24 	uint64_t		io_completed;
25 	uint64_t		prev_io_completed;
26 	uint64_t		current_queue_depth;
27 	uint64_t		offset_in_ios;
28 	char			name[1024];
29 };
30 
31 struct perf_task {
32 	struct dev_ctx		*dev;
33 	void			*buf;
34 };
35 
36 static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs);
37 
38 static uint64_t g_tsc_rate;
39 
40 static uint32_t g_io_size_bytes = 4096;
41 static int g_queue_depth = 4;
42 static int g_time_in_sec;
43 static int g_expected_insert_times = -1;
44 static int g_expected_removal_times = -1;
45 static int g_insert_times;
46 static int g_removal_times;
47 static int g_shm_id = -1;
48 static const char *g_iova_mode = NULL;
49 static uint64_t g_timeout_in_us = SPDK_SEC_TO_USEC;
50 static struct spdk_nvme_detach_ctx *g_detach_ctx;
51 
52 static void task_complete(struct perf_task *task);
53 
54 static void timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
55 		       struct spdk_nvme_qpair *qpair, uint16_t cid);
56 
57 static void
58 register_dev(struct spdk_nvme_ctrlr *ctrlr)
59 {
60 	struct dev_ctx *dev;
61 	const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
62 
63 	dev = calloc(1, sizeof(*dev));
64 	if (dev == NULL) {
65 		perror("dev_ctx malloc");
66 		exit(1);
67 	}
68 
69 	snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
70 
71 	dev->ctrlr = ctrlr;
72 	dev->is_new = true;
73 	dev->is_removed = false;
74 	dev->is_draining = false;
75 
76 	spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout_in_us, g_timeout_in_us, timeout_cb,
77 			NULL);
78 
79 	dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1);
80 
81 	if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) {
82 		fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name);
83 		goto skip;
84 	}
85 
86 	if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes ||
87 	    spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) {
88 		fprintf(stderr, "Controller %s: Invalid "
89 			"ns size %" PRIu64 " / block size %u for I/O size %u\n",
90 			dev->name,
91 			spdk_nvme_ns_get_size(dev->ns),
92 			spdk_nvme_ns_get_sector_size(dev->ns),
93 			g_io_size_bytes);
94 		goto skip;
95 	}
96 
97 	dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes;
98 	dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns);
99 
100 	dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
101 	if (!dev->qpair) {
102 		fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");
103 		goto skip;
104 	}
105 	g_insert_times++;
106 	TAILQ_INSERT_TAIL(&g_devs, dev, tailq);
107 	return;
108 
109 skip:
110 	free(dev);
111 }
112 
113 static void
114 unregister_dev(struct dev_ctx *dev)
115 {
116 	fprintf(stderr, "unregister_dev: %s\n", dev->name);
117 
118 	spdk_nvme_ctrlr_free_io_qpair(dev->qpair);
119 	spdk_nvme_detach_async(dev->ctrlr, &g_detach_ctx);
120 
121 	TAILQ_REMOVE(&g_devs, dev, tailq);
122 	free(dev);
123 }
124 
125 static struct perf_task *
126 alloc_task(struct dev_ctx *dev)
127 {
128 	struct perf_task *task;
129 
130 	task = calloc(1, sizeof(*task));
131 	if (task == NULL) {
132 		return NULL;
133 	}
134 
135 	task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
136 	if (task->buf == NULL) {
137 		free(task);
138 		return NULL;
139 	}
140 
141 	task->dev = dev;
142 
143 	return task;
144 }
145 
146 static void
147 free_task(struct perf_task *task)
148 {
149 	spdk_dma_free(task->buf);
150 	free(task);
151 }
152 
153 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
154 
155 static void
156 submit_single_io(struct perf_task *task)
157 {
158 	struct dev_ctx		*dev = task->dev;
159 	uint64_t		offset_in_ios;
160 	int			rc;
161 
162 	offset_in_ios = dev->offset_in_ios++;
163 	if (dev->offset_in_ios == dev->size_in_ios) {
164 		dev->offset_in_ios = 0;
165 	}
166 
167 	rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf,
168 				   offset_in_ios * dev->io_size_blocks,
169 				   dev->io_size_blocks, io_complete, task, 0);
170 
171 	if (rc != 0) {
172 		fprintf(stderr, "starting I/O failed\n");
173 		free_task(task);
174 	} else {
175 		dev->current_queue_depth++;
176 	}
177 }
178 
179 static void
180 task_complete(struct perf_task *task)
181 {
182 	struct dev_ctx *dev;
183 
184 	dev = task->dev;
185 	dev->current_queue_depth--;
186 	dev->io_completed++;
187 
188 	/*
189 	 * is_draining indicates when time has expired for the test run
190 	 * and we are just waiting for the previously submitted I/O
191 	 * to complete.  In this case, do not submit a new I/O to replace
192 	 * the one just completed.
193 	 */
194 	if (!dev->is_draining && !dev->is_removed) {
195 		submit_single_io(task);
196 	} else {
197 		free_task(task);
198 	}
199 }
200 
201 static void
202 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
203 {
204 	task_complete((struct perf_task *)ctx);
205 }
206 
207 static void
208 check_io(struct dev_ctx *dev)
209 {
210 	spdk_nvme_qpair_process_completions(dev->qpair, 0);
211 }
212 
213 static void
214 submit_io(struct dev_ctx *dev, int queue_depth)
215 {
216 	struct perf_task *task;
217 
218 	while (queue_depth-- > 0) {
219 		task = alloc_task(dev);
220 		if (task == NULL) {
221 			fprintf(stderr, "task allocation failed\n");
222 			exit(1);
223 		}
224 
225 		submit_single_io(task);
226 	}
227 }
228 
229 static void
230 drain_io(struct dev_ctx *dev)
231 {
232 	dev->is_draining = true;
233 	while (dev->current_queue_depth > 0) {
234 		check_io(dev);
235 	}
236 }
237 
238 static void
239 print_stats(void)
240 {
241 	struct dev_ctx *dev;
242 
243 	TAILQ_FOREACH(dev, &g_devs, tailq) {
244 		fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n",
245 			dev->name,
246 			dev->io_completed,
247 			dev->io_completed - dev->prev_io_completed);
248 		dev->prev_io_completed = dev->io_completed;
249 	}
250 
251 	fprintf(stderr, "\n");
252 }
253 
254 static bool
255 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
256 	 struct spdk_nvme_ctrlr_opts *opts)
257 {
258 	fprintf(stderr, "Attaching to %s\n", trid->traddr);
259 
260 	return true;
261 }
262 
263 static void
264 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
265 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
266 {
267 	fprintf(stderr, "Attached to %s\n", trid->traddr);
268 
269 	register_dev(ctrlr);
270 }
271 
272 static void
273 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
274 {
275 	struct dev_ctx *dev;
276 
277 	TAILQ_FOREACH(dev, &g_devs, tailq) {
278 		if (dev->ctrlr == ctrlr) {
279 			/*
280 			 * Mark the device as removed, but don't detach yet.
281 			 *
282 			 * The I/O handling code will detach once it sees that
283 			 * is_removed is true and all outstanding I/O have been completed.
284 			 */
285 			dev->is_removed = true;
286 			fprintf(stderr, "Controller removed: %s\n", dev->name);
287 			return;
288 		}
289 	}
290 
291 	/*
292 	 * If we get here, this remove_cb is for a controller that we are not tracking
293 	 * in g_devs (for example, because we skipped it during register_dev),
294 	 * so immediately detach it.
295 	 */
296 	spdk_nvme_detach_async(ctrlr, &g_detach_ctx);
297 }
298 
299 static void
300 timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
301 	   struct spdk_nvme_qpair *qpair, uint16_t cid)
302 {
303 	/* leave hotplug monitor loop, use the timeout_cb to monitor the hotplug */
304 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
305 		fprintf(stderr, "spdk_nvme_probe() failed\n");
306 	}
307 }
308 
309 static void
310 io_loop(void)
311 {
312 	struct dev_ctx *dev, *dev_tmp;
313 	uint64_t tsc_end;
314 	uint64_t next_stats_tsc;
315 	int rc;
316 
317 	tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
318 	next_stats_tsc = spdk_get_ticks();
319 
320 	while (1) {
321 		uint64_t now;
322 
323 		/*
324 		 * Check for completed I/O for each controller. A new
325 		 * I/O will be submitted in the io_complete callback
326 		 * to replace each I/O that is completed.
327 		 */
328 		TAILQ_FOREACH(dev, &g_devs, tailq) {
329 			if (dev->is_new) {
330 				/* Submit initial I/O for this controller. */
331 				submit_io(dev, g_queue_depth);
332 				dev->is_new = false;
333 			}
334 
335 			check_io(dev);
336 		}
337 
338 		/*
339 		 * Check for hotplug events.
340 		 */
341 		if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
342 			fprintf(stderr, "spdk_nvme_probe() failed\n");
343 			break;
344 		}
345 
346 		/*
347 		 * Check for devices which were hot-removed and have finished
348 		 * processing outstanding I/Os.
349 		 *
350 		 * unregister_dev() may remove devs from the list, so use the
351 		 * removal-safe iterator.
352 		 */
353 		TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
354 			if (dev->is_removed && dev->current_queue_depth == 0) {
355 				g_removal_times++;
356 				unregister_dev(dev);
357 			}
358 		}
359 
360 		if (g_detach_ctx) {
361 			rc = spdk_nvme_detach_poll_async(g_detach_ctx);
362 			if (rc == 0) {
363 				g_detach_ctx = NULL;
364 			}
365 		}
366 
367 		now = spdk_get_ticks();
368 		if (now > tsc_end) {
369 			break;
370 		}
371 		if (now > next_stats_tsc) {
372 			print_stats();
373 			next_stats_tsc += g_tsc_rate;
374 		}
375 
376 		if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) {
377 			break;
378 		}
379 	}
380 
381 	TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
382 		drain_io(dev);
383 		unregister_dev(dev);
384 	}
385 
386 	if (g_detach_ctx) {
387 		spdk_nvme_detach_poll(g_detach_ctx);
388 	}
389 }
390 
391 static void
392 usage(char *program_name)
393 {
394 	printf("%s options", program_name);
395 	printf("\n");
396 	printf("\t[-c timeout for each command in second(default:1s)]\n");
397 	printf("\t[-i shm id (optional)]\n");
398 	printf("\t[-n expected hot insert times]\n");
399 	printf("\t[-r expected hot removal times]\n");
400 	printf("\t[-t time in seconds]\n");
401 	printf("\t[-m iova mode: pa or va (optional)\n");
402 	printf("\t[-l log level]\n");
403 	printf("\t Available log levels:\n");
404 	printf("\t  disabled, error, warning, notice, info, debug\n");
405 }
406 
407 static int
408 parse_args(int argc, char **argv)
409 {
410 	int op;
411 	long int val;
412 
413 	/* default value */
414 	g_time_in_sec = 0;
415 
416 	while ((op = getopt(argc, argv, "c:i:l:m:n:r:t:")) != -1) {
417 		if (op == '?') {
418 			usage(argv[0]);
419 			return 1;
420 		}
421 
422 		switch (op) {
423 		case 'c':
424 		case 'i':
425 		case 'n':
426 		case 'r':
427 		case 't':
428 			val = spdk_strtol(optarg, 10);
429 			if (val < 0) {
430 				fprintf(stderr, "Converting a string to integer failed\n");
431 				return val;
432 			}
433 			switch (op) {
434 			case 'c':
435 				g_timeout_in_us = val * SPDK_SEC_TO_USEC;
436 				break;
437 			case 'i':
438 				g_shm_id = val;
439 				break;
440 			case 'n':
441 				g_expected_insert_times = val;
442 				break;
443 			case 'r':
444 				g_expected_removal_times = val;
445 				break;
446 			case 't':
447 				g_time_in_sec = val;
448 				break;
449 			}
450 			break;
451 		case 'm':
452 			g_iova_mode = optarg;
453 			break;
454 		case 'l':
455 			if (!strcmp(optarg, "disabled")) {
456 				spdk_log_set_print_level(SPDK_LOG_DISABLED);
457 			} else if (!strcmp(optarg, "error")) {
458 				spdk_log_set_print_level(SPDK_LOG_ERROR);
459 			} else if (!strcmp(optarg, "warning")) {
460 				spdk_log_set_print_level(SPDK_LOG_WARN);
461 			} else if (!strcmp(optarg, "notice")) {
462 				spdk_log_set_print_level(SPDK_LOG_NOTICE);
463 			} else if (!strcmp(optarg, "info")) {
464 				spdk_log_set_print_level(SPDK_LOG_INFO);
465 			} else if (!strcmp(optarg, "debug")) {
466 				spdk_log_set_print_level(SPDK_LOG_DEBUG);
467 			} else {
468 				fprintf(stderr, "Unrecognized log level: %s\n", optarg);
469 				return 1;
470 			}
471 			break;
472 		default:
473 			usage(argv[0]);
474 			return 1;
475 		}
476 	}
477 
478 	if (!g_time_in_sec) {
479 		usage(argv[0]);
480 		return 1;
481 	}
482 
483 	return 0;
484 }
485 
486 
487 static int
488 register_controllers(void)
489 {
490 	fprintf(stderr, "Initializing NVMe Controllers\n");
491 
492 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
493 		fprintf(stderr, "spdk_nvme_probe() failed\n");
494 		return 1;
495 	}
496 	/* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */
497 	g_insert_times = 0;
498 	return 0;
499 }
500 
501 int
502 main(int argc, char **argv)
503 {
504 	int rc;
505 	struct spdk_env_opts opts;
506 
507 	rc = parse_args(argc, argv);
508 	if (rc != 0) {
509 		return rc;
510 	}
511 
512 	spdk_env_opts_init(&opts);
513 	opts.name = "hotplug";
514 	opts.core_mask = "0x1";
515 	if (g_shm_id > -1) {
516 		opts.shm_id = g_shm_id;
517 	}
518 	if (g_iova_mode) {
519 		opts.iova_mode = g_iova_mode;
520 	}
521 	if (spdk_env_init(&opts) < 0) {
522 		fprintf(stderr, "Unable to initialize SPDK env\n");
523 		return 1;
524 	}
525 
526 	g_tsc_rate = spdk_get_ticks_hz();
527 
528 	/* Detect the controllers that are plugged in at startup. */
529 	if (register_controllers() != 0) {
530 		rc = 1;
531 		goto cleanup;
532 	}
533 
534 	fprintf(stderr, "Initialization complete. Starting I/O...\n");
535 	io_loop();
536 
537 	if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) {
538 		fprintf(stderr, "Expected inserts %d != actual inserts %d\n",
539 			g_expected_insert_times, g_insert_times);
540 		rc = 1;
541 		goto cleanup;
542 	}
543 
544 	if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) {
545 		fprintf(stderr, "Expected removals %d != actual removals %d\n",
546 			g_expected_removal_times, g_removal_times);
547 		rc = 1;
548 	}
549 
550 cleanup:
551 	spdk_env_fini();
552 	return rc;
553 }
554