xref: /spdk/examples/nvme/hotplug/hotplug.c (revision 179ed697b3c461d100e675915d074be717b7b9cc)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include <rte_config.h>
37 #include <rte_mempool.h>
38 
39 #include "spdk/nvme.h"
40 #include "spdk/queue.h"
41 
42 struct dev_ctx {
43 	TAILQ_ENTRY(dev_ctx)	tailq;
44 	bool			is_new;
45 	bool			is_removed;
46 	bool			is_draining;
47 	struct spdk_nvme_ctrlr	*ctrlr;
48 	struct spdk_nvme_ns	*ns;
49 	struct spdk_nvme_qpair	*qpair;
50 	uint32_t		io_size_blocks;
51 	uint64_t		size_in_ios;
52 	uint64_t		io_completed;
53 	uint64_t		prev_io_completed;
54 	uint64_t		current_queue_depth;
55 	uint64_t		offset_in_ios;
56 	char			name[1024];
57 };
58 
59 struct perf_task {
60 	struct dev_ctx		*dev;
61 	void			*buf;
62 };
63 
64 static struct rte_mempool *task_pool;
65 
66 static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs);
67 
68 static uint64_t g_tsc_rate;
69 
70 static uint32_t g_io_size_bytes = 4096;
71 static int g_queue_depth = 4;
72 static int g_time_in_sec;
73 static int g_expected_insert_times = -1;
74 static int g_expected_removal_times = -1;
75 static int g_insert_times;
76 static int g_removal_times;
77 static int g_shm_id = -1;
78 
79 static void
80 task_complete(struct perf_task *task);
81 
82 static void
83 register_dev(struct spdk_nvme_ctrlr *ctrlr)
84 {
85 	struct dev_ctx *dev;
86 	const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
87 
88 	dev = calloc(1, sizeof(*dev));
89 	if (dev == NULL) {
90 		perror("dev_ctx malloc");
91 		exit(1);
92 	}
93 
94 	snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
95 
96 	dev->ctrlr = ctrlr;
97 	dev->is_new = true;
98 	dev->is_removed = false;
99 	dev->is_draining = false;
100 
101 	dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1);
102 
103 	if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) {
104 		fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name);
105 		goto skip;
106 	}
107 
108 	if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes ||
109 	    spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) {
110 		fprintf(stderr, "Controller %s: Invalid "
111 			"ns size %" PRIu64 " / block size %u for I/O size %u\n",
112 			dev->name,
113 			spdk_nvme_ns_get_size(dev->ns),
114 			spdk_nvme_ns_get_sector_size(dev->ns),
115 			g_io_size_bytes);
116 		goto skip;
117 	}
118 
119 	dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes;
120 	dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns);
121 
122 	dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
123 	if (!dev->qpair) {
124 		fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");
125 		goto skip;
126 	}
127 	g_insert_times++;
128 	TAILQ_INSERT_TAIL(&g_devs, dev, tailq);
129 	return;
130 
131 skip:
132 	free(dev);
133 }
134 
135 static void
136 unregister_dev(struct dev_ctx *dev)
137 {
138 	fprintf(stderr, "unregister_dev: %s\n", dev->name);
139 
140 	spdk_nvme_ctrlr_free_io_qpair(dev->qpair);
141 	spdk_nvme_detach(dev->ctrlr);
142 
143 	TAILQ_REMOVE(&g_devs, dev, tailq);
144 	free(dev);
145 }
146 
147 static void task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
148 {
149 	struct perf_task *task = __task;
150 	task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
151 	if (task->buf == NULL) {
152 		fprintf(stderr, "task->buf rte_malloc failed\n");
153 		exit(1);
154 	}
155 	memset(task->buf, id % 8, g_io_size_bytes);
156 }
157 
158 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
159 
160 static void
161 submit_single_io(struct dev_ctx *dev)
162 {
163 	struct perf_task	*task = NULL;
164 	uint64_t		offset_in_ios;
165 	int			rc;
166 
167 	if (rte_mempool_get(task_pool, (void **)&task) != 0) {
168 		fprintf(stderr, "task_pool rte_mempool_get failed\n");
169 		exit(1);
170 	}
171 
172 	task->dev = dev;
173 
174 	offset_in_ios = dev->offset_in_ios++;
175 	if (dev->offset_in_ios == dev->size_in_ios) {
176 		dev->offset_in_ios = 0;
177 	}
178 
179 	rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf,
180 				   offset_in_ios * dev->io_size_blocks,
181 				   dev->io_size_blocks, io_complete, task, 0);
182 
183 	if (rc != 0) {
184 		fprintf(stderr, "starting I/O failed\n");
185 		rte_mempool_put(task_pool, task);
186 	} else {
187 		dev->current_queue_depth++;
188 	}
189 }
190 
191 static void
192 task_complete(struct perf_task *task)
193 {
194 	struct dev_ctx *dev;
195 
196 	dev = task->dev;
197 	dev->current_queue_depth--;
198 	dev->io_completed++;
199 
200 	rte_mempool_put(task_pool, task);
201 
202 	/*
203 	 * is_draining indicates when time has expired for the test run
204 	 * and we are just waiting for the previously submitted I/O
205 	 * to complete.  In this case, do not submit a new I/O to replace
206 	 * the one just completed.
207 	 */
208 	if (!dev->is_draining && !dev->is_removed) {
209 		submit_single_io(dev);
210 	}
211 }
212 
213 static void
214 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
215 {
216 	task_complete((struct perf_task *)ctx);
217 }
218 
219 static void
220 check_io(struct dev_ctx *dev)
221 {
222 	spdk_nvme_qpair_process_completions(dev->qpair, 0);
223 }
224 
225 static void
226 submit_io(struct dev_ctx *dev, int queue_depth)
227 {
228 	while (queue_depth-- > 0) {
229 		submit_single_io(dev);
230 	}
231 }
232 
233 static void
234 drain_io(struct dev_ctx *dev)
235 {
236 	dev->is_draining = true;
237 	while (dev->current_queue_depth > 0) {
238 		check_io(dev);
239 	}
240 }
241 
242 static void
243 print_stats(void)
244 {
245 	struct dev_ctx *dev;
246 
247 	TAILQ_FOREACH(dev, &g_devs, tailq) {
248 		fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n",
249 			dev->name,
250 			dev->io_completed,
251 			dev->io_completed - dev->prev_io_completed);
252 		dev->prev_io_completed = dev->io_completed;
253 	}
254 
255 	fprintf(stderr, "\n");
256 }
257 
258 static bool
259 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
260 	 struct spdk_nvme_ctrlr_opts *opts)
261 {
262 	fprintf(stderr, "Attaching to %s\n", trid->traddr);
263 
264 	return true;
265 }
266 
267 static void
268 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
269 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
270 {
271 	fprintf(stderr, "Attached to %s\n", trid->traddr);
272 
273 	register_dev(ctrlr);
274 }
275 
276 static void
277 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
278 {
279 	struct dev_ctx *dev;
280 
281 	TAILQ_FOREACH(dev, &g_devs, tailq) {
282 		if (dev->ctrlr == ctrlr) {
283 			/*
284 			 * Mark the device as removed, but don't detach yet.
285 			 *
286 			 * The I/O handling code will detach once it sees that
287 			 * is_removed is true and all outstanding I/O have been completed.
288 			 */
289 			dev->is_removed = true;
290 			fprintf(stderr, "Controller removed: %s\n", dev->name);
291 			return;
292 		}
293 	}
294 
295 	/*
296 	 * If we get here, this remove_cb is for a controller that we are not tracking
297 	 * in g_devs (for example, because we skipped it during register_dev),
298 	 * so immediately detach it.
299 	 */
300 	spdk_nvme_detach(ctrlr);
301 }
302 
303 static void
304 io_loop(void)
305 {
306 	struct dev_ctx *dev, *dev_tmp;
307 	uint64_t tsc_end;
308 	uint64_t next_stats_tsc;
309 
310 	tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
311 	next_stats_tsc = spdk_get_ticks();
312 
313 	while (1) {
314 		uint64_t now;
315 
316 		/*
317 		 * Check for completed I/O for each controller. A new
318 		 * I/O will be submitted in the io_complete callback
319 		 * to replace each I/O that is completed.
320 		 */
321 		TAILQ_FOREACH(dev, &g_devs, tailq) {
322 			if (dev->is_new) {
323 				/* Submit initial I/O for this controller. */
324 				submit_io(dev, g_queue_depth);
325 				dev->is_new = false;
326 			}
327 
328 			check_io(dev);
329 		}
330 
331 		/*
332 		 * Check for hotplug events.
333 		 */
334 		if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
335 			fprintf(stderr, "spdk_nvme_probe() failed\n");
336 			break;
337 		}
338 
339 		/*
340 		 * Check for devices which were hot-removed and have finished
341 		 * processing outstanding I/Os.
342 		 *
343 		 * unregister_dev() may remove devs from the list, so use the
344 		 * removal-safe iterator.
345 		 */
346 		TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
347 			if (dev->is_removed && dev->current_queue_depth == 0) {
348 				g_removal_times++;
349 				unregister_dev(dev);
350 			}
351 		}
352 
353 		now = spdk_get_ticks();
354 		if (now > tsc_end) {
355 			break;
356 		}
357 		if (now > next_stats_tsc) {
358 			print_stats();
359 			next_stats_tsc += g_tsc_rate;
360 		}
361 
362 		if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) {
363 			break;
364 		}
365 	}
366 
367 	TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) {
368 		drain_io(dev);
369 		unregister_dev(dev);
370 	}
371 }
372 
373 static void usage(char *program_name)
374 {
375 	printf("%s options", program_name);
376 	printf("\n");
377 	printf("\t[-i shm id (optional)]\n");
378 	printf("\t[-n expected hot insert times]\n");
379 	printf("\t[-r expected hot removal times]\n");
380 	printf("\t[-t time in seconds]\n");
381 }
382 
383 static int
384 parse_args(int argc, char **argv)
385 {
386 	int op;
387 
388 	/* default value */
389 	g_time_in_sec = 0;
390 
391 	while ((op = getopt(argc, argv, "i:n:r:t:")) != -1) {
392 		switch (op) {
393 		case 'i':
394 			g_shm_id = atoi(optarg);
395 			break;
396 		case 'n':
397 			g_expected_insert_times = atoi(optarg);
398 			break;
399 		case 'r':
400 			g_expected_removal_times = atoi(optarg);
401 			break;
402 		case 't':
403 			g_time_in_sec = atoi(optarg);
404 			break;
405 		default:
406 			usage(argv[0]);
407 			return 1;
408 		}
409 	}
410 
411 	if (!g_time_in_sec) {
412 		usage(argv[0]);
413 		return 1;
414 	}
415 
416 	return 0;
417 }
418 
419 
420 static int
421 register_controllers(void)
422 {
423 	fprintf(stderr, "Initializing NVMe Controllers\n");
424 
425 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) {
426 		fprintf(stderr, "spdk_nvme_probe() failed\n");
427 		return 1;
428 	}
429 	/* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */
430 	g_insert_times = 0;
431 	return 0;
432 }
433 
434 int main(int argc, char **argv)
435 {
436 	int rc;
437 	struct spdk_env_opts opts;
438 
439 	rc = parse_args(argc, argv);
440 	if (rc != 0) {
441 		return rc;
442 	}
443 
444 	spdk_env_opts_init(&opts);
445 	opts.name = "hotplug";
446 	opts.core_mask = "0x1";
447 	if (g_shm_id > -1) {
448 		opts.shm_id = g_shm_id;
449 	}
450 	spdk_env_init(&opts);
451 
452 	task_pool = rte_mempool_create("task_pool", 8192,
453 				       sizeof(struct perf_task),
454 				       64, 0, NULL, NULL, task_ctor, NULL,
455 				       SOCKET_ID_ANY, 0);
456 
457 	g_tsc_rate = spdk_get_ticks_hz();
458 
459 	/* Detect the controllers that are plugged in at startup. */
460 	if (register_controllers() != 0) {
461 		return 1;
462 	}
463 
464 	fprintf(stderr, "Initialization complete. Starting I/O...\n");
465 	io_loop();
466 
467 	if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) {
468 		fprintf(stderr, "Expected inserts %d != actual inserts %d\n",
469 			g_expected_insert_times, g_insert_times);
470 		return 1;
471 	}
472 
473 	if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) {
474 		fprintf(stderr, "Expected removals %d != actual removals %d\n",
475 			g_expected_removal_times, g_removal_times);
476 		return 1;
477 	}
478 
479 	return 0;
480 }
481