xref: /spdk/test/nvme/reset/reset.c (revision ee32a82bfd3ff5b1a10ed775ee06f0eaffce60eb)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2015 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/nvme.h"
9 #include "spdk/env.h"
10 #include "spdk/string.h"
11 #include "spdk/pci_ids.h"
12 
13 struct ctrlr_entry {
14 	struct spdk_nvme_ctrlr		*ctrlr;
15 	TAILQ_ENTRY(ctrlr_entry)	link;
16 	char				name[1024];
17 };
18 
19 struct ns_entry {
20 	struct spdk_nvme_ns	*ns;
21 	struct spdk_nvme_ctrlr	*ctrlr;
22 	TAILQ_ENTRY(ns_entry)	link;
23 	uint32_t		io_size_blocks;
24 	uint64_t		size_in_ios;
25 	char			name[1024];
26 };
27 
28 struct ns_worker_ctx {
29 	struct ns_entry			*entry;
30 	struct spdk_nvme_qpair		*qpair;
31 	uint64_t			io_completed;
32 	uint64_t			io_completed_error;
33 	uint64_t			io_submitted;
34 	uint64_t			current_queue_depth;
35 	uint64_t			offset_in_ios;
36 	bool				is_draining;
37 
38 	TAILQ_ENTRY(ns_worker_ctx)	link;
39 };
40 
41 struct reset_task {
42 	struct ns_worker_ctx	*ns_ctx;
43 	void			*buf;
44 };
45 
46 struct worker_thread {
47 	TAILQ_HEAD(, ns_worker_ctx)	ns_ctx;
48 	unsigned			lcore;
49 };
50 
51 static struct spdk_mempool *task_pool;
52 
53 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers);
54 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces);
55 static int g_num_namespaces = 0;
56 static struct worker_thread *g_worker = NULL;
57 static bool g_qemu_ssd_found = false;
58 
59 static uint64_t g_tsc_rate;
60 
61 static int g_io_size_bytes;
62 static int g_rw_percentage;
63 static int g_is_random;
64 static int g_queue_depth;
65 static int g_time_in_sec;
66 
67 #define  TASK_POOL_NUM 8192
68 
69 static void
70 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
71 {
72 	struct ns_entry *entry;
73 	const struct spdk_nvme_ctrlr_data *cdata;
74 
75 	if (!spdk_nvme_ns_is_active(ns)) {
76 		printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns));
77 		return;
78 	}
79 
80 	entry = malloc(sizeof(struct ns_entry));
81 	if (entry == NULL) {
82 		perror("ns_entry malloc");
83 		exit(1);
84 	}
85 
86 	cdata = spdk_nvme_ctrlr_get_data(ctrlr);
87 
88 	entry->ns = ns;
89 	entry->ctrlr = ctrlr;
90 	entry->size_in_ios = spdk_nvme_ns_get_size(ns) /
91 			     g_io_size_bytes;
92 	entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
93 
94 	snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
95 
96 	g_num_namespaces++;
97 	TAILQ_INSERT_TAIL(&g_namespaces, entry, link);
98 }
99 
100 static void
101 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
102 {
103 	int nsid;
104 	struct spdk_nvme_ns *ns;
105 	struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
106 
107 	if (entry == NULL) {
108 		perror("ctrlr_entry malloc");
109 		exit(1);
110 	}
111 
112 	entry->ctrlr = ctrlr;
113 	TAILQ_INSERT_TAIL(&g_controllers, entry, link);
114 
115 	for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0;
116 	     nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
117 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
118 		if (ns == NULL) {
119 			continue;
120 		}
121 		register_ns(ctrlr, ns);
122 	}
123 }
124 
125 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
126 
127 static __thread unsigned int seed = 0;
128 
129 static void
130 submit_single_io(struct ns_worker_ctx *ns_ctx)
131 {
132 	struct reset_task	*task = NULL;
133 	uint64_t		offset_in_ios;
134 	int			rc;
135 	struct ns_entry		*entry = ns_ctx->entry;
136 
137 	task = spdk_mempool_get(task_pool);
138 	if (!task) {
139 		fprintf(stderr, "Failed to get task from task_pool\n");
140 		exit(1);
141 	}
142 
143 	task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
144 	if (!task->buf) {
145 		spdk_free(task->buf);
146 		fprintf(stderr, "task->buf spdk_zmalloc failed\n");
147 		exit(1);
148 	}
149 
150 	task->ns_ctx = ns_ctx;
151 
152 	if (g_is_random) {
153 		offset_in_ios = rand_r(&seed) % entry->size_in_ios;
154 	} else {
155 		offset_in_ios = ns_ctx->offset_in_ios++;
156 		if (ns_ctx->offset_in_ios == entry->size_in_ios) {
157 			ns_ctx->offset_in_ios = 0;
158 		}
159 	}
160 
161 	if ((g_rw_percentage == 100) ||
162 	    (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
163 		rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf,
164 					   offset_in_ios * entry->io_size_blocks,
165 					   entry->io_size_blocks, io_complete, task, 0);
166 	} else {
167 		rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf,
168 					    offset_in_ios * entry->io_size_blocks,
169 					    entry->io_size_blocks, io_complete, task, 0);
170 	}
171 
172 	if (rc != 0) {
173 		fprintf(stderr, "starting I/O failed\n");
174 	} else {
175 		ns_ctx->current_queue_depth++;
176 		ns_ctx->io_submitted++;
177 	}
178 }
179 
180 static void
181 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion)
182 {
183 	struct ns_worker_ctx	*ns_ctx;
184 
185 	ns_ctx = task->ns_ctx;
186 	ns_ctx->current_queue_depth--;
187 
188 	if (spdk_nvme_cpl_is_error(completion)) {
189 		ns_ctx->io_completed_error++;
190 	} else {
191 		ns_ctx->io_completed++;
192 	}
193 
194 	spdk_free(task->buf);
195 	spdk_mempool_put(task_pool, task);
196 
197 	/*
198 	 * is_draining indicates when time has expired for the test run
199 	 * and we are just waiting for the previously submitted I/O
200 	 * to complete.  In this case, do not submit a new I/O to replace
201 	 * the one just completed.
202 	 */
203 	if (!ns_ctx->is_draining) {
204 		submit_single_io(ns_ctx);
205 	}
206 }
207 
208 static void
209 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
210 {
211 	task_complete((struct reset_task *)ctx, completion);
212 }
213 
214 static void
215 check_io(struct ns_worker_ctx *ns_ctx)
216 {
217 	spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
218 }
219 
220 static void
221 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
222 {
223 	while (queue_depth-- > 0) {
224 		submit_single_io(ns_ctx);
225 	}
226 }
227 
228 static void
229 drain_io(struct ns_worker_ctx *ns_ctx)
230 {
231 	ns_ctx->is_draining = true;
232 	while (ns_ctx->current_queue_depth > 0) {
233 		check_io(ns_ctx);
234 	}
235 }
236 
237 static int
238 work_fn(void *arg)
239 {
240 	uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
241 	struct worker_thread *worker = (struct worker_thread *)arg;
242 	struct ns_worker_ctx *ns_ctx = NULL;
243 	bool did_reset = false;
244 
245 	printf("Starting thread on core %u\n", worker->lcore);
246 
247 	/* Submit initial I/O for each namespace. */
248 	TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
249 		ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0);
250 		if (ns_ctx->qpair == NULL) {
251 			fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore);
252 			return -1;
253 		}
254 		submit_io(ns_ctx, g_queue_depth);
255 	}
256 
257 	while (1) {
258 		if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) {
259 			TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
260 				if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) {
261 					fprintf(stderr, "nvme reset failed.\n");
262 					return -1;
263 				}
264 			}
265 			did_reset = true;
266 		}
267 
268 		/*
269 		 * Check for completed I/O for each controller. A new
270 		 * I/O will be submitted in the io_complete callback
271 		 * to replace each I/O that is completed.
272 		 */
273 		TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
274 			check_io(ns_ctx);
275 		}
276 
277 		if (spdk_get_ticks() > tsc_end) {
278 			break;
279 		}
280 	}
281 
282 	TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
283 		drain_io(ns_ctx);
284 		spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
285 	}
286 
287 	return 0;
288 }
289 
290 static void
291 usage(char *program_name)
292 {
293 	printf("%s options", program_name);
294 	printf("\n");
295 	printf("\t[-q io depth]\n");
296 	printf("\t[-o io size in bytes]\n");
297 	printf("\t[-w io pattern type, must be one of\n");
298 	printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
299 	printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
300 	printf("\t[-t time in seconds(should be larger than 15 seconds)]\n");
301 	printf("\t\t(default:0 - unlimited)\n");
302 }
303 
304 static int
305 print_stats(void)
306 {
307 	uint64_t io_completed, io_submitted, io_completed_error;
308 	uint64_t total_completed_io, total_submitted_io, total_completed_err_io;
309 	struct worker_thread	*worker;
310 	struct ns_worker_ctx	*ns_ctx;
311 
312 	total_completed_io = 0;
313 	total_submitted_io = 0;
314 	total_completed_err_io = 0;
315 
316 	worker = g_worker;
317 	TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
318 		io_completed = ns_ctx->io_completed;
319 		io_submitted = ns_ctx->io_submitted;
320 		io_completed_error = ns_ctx->io_completed_error;
321 		total_completed_io += io_completed;
322 		total_submitted_io += io_submitted;
323 		total_completed_err_io += io_completed_error;
324 	}
325 
326 	printf("========================================================\n");
327 	printf("%16" PRIu64 " IO completed successfully\n", total_completed_io);
328 	printf("%16" PRIu64 " IO completed with error\n", total_completed_err_io);
329 	printf("--------------------------------------------------------\n");
330 	printf("%16" PRIu64 " IO completed total\n", total_completed_io + total_completed_err_io);
331 	printf("%16" PRIu64 " IO submitted\n", total_submitted_io);
332 
333 	if (total_submitted_io != (total_completed_io + total_completed_err_io)) {
334 		fprintf(stderr, "Some IO are missing......\n");
335 		return -1;
336 	}
337 
338 	return 0;
339 }
340 
341 static int
342 parse_args(int argc, char **argv)
343 {
344 	const char *workload_type;
345 	int op;
346 	bool mix_specified = false;
347 	long int val;
348 
349 	/* default value */
350 	g_queue_depth = 0;
351 	g_io_size_bytes = 0;
352 	workload_type = NULL;
353 	g_time_in_sec = 0;
354 	g_rw_percentage = -1;
355 
356 	while ((op = getopt(argc, argv, "o:q:t:w:M:")) != -1) {
357 		if (op == 'w') {
358 			workload_type = optarg;
359 		} else if (op == '?') {
360 			usage(argv[0]);
361 			return -EINVAL;
362 		} else {
363 			val = spdk_strtol(optarg, 10);
364 			if (val < 0) {
365 				fprintf(stderr, "Converting a string to integer failed\n");
366 				return val;
367 			}
368 			switch (op) {
369 			case 'q':
370 				g_queue_depth = val;
371 				break;
372 			case 'o':
373 				g_io_size_bytes = val;
374 				break;
375 			case 't':
376 				g_time_in_sec = val;
377 				break;
378 			case 'M':
379 				g_rw_percentage = val;
380 				mix_specified = true;
381 				break;
382 			default:
383 				usage(argv[0]);
384 				return -EINVAL;
385 			}
386 		}
387 	}
388 
389 	if (!g_queue_depth) {
390 		usage(argv[0]);
391 		return 1;
392 	}
393 	if (!g_io_size_bytes) {
394 		usage(argv[0]);
395 		return 1;
396 	}
397 	if (!workload_type) {
398 		usage(argv[0]);
399 		return 1;
400 	}
401 	if (!g_time_in_sec) {
402 		usage(argv[0]);
403 		return 1;
404 	}
405 
406 	if (strcmp(workload_type, "read") &&
407 	    strcmp(workload_type, "write") &&
408 	    strcmp(workload_type, "randread") &&
409 	    strcmp(workload_type, "randwrite") &&
410 	    strcmp(workload_type, "rw") &&
411 	    strcmp(workload_type, "randrw")) {
412 		fprintf(stderr,
413 			"io pattern type must be one of\n"
414 			"(read, write, randread, randwrite, rw, randrw)\n");
415 		return 1;
416 	}
417 
418 	if (!strcmp(workload_type, "read") ||
419 	    !strcmp(workload_type, "randread")) {
420 		g_rw_percentage = 100;
421 	}
422 
423 	if (!strcmp(workload_type, "write") ||
424 	    !strcmp(workload_type, "randwrite")) {
425 		g_rw_percentage = 0;
426 	}
427 
428 	if (!strcmp(workload_type, "read") ||
429 	    !strcmp(workload_type, "randread") ||
430 	    !strcmp(workload_type, "write") ||
431 	    !strcmp(workload_type, "randwrite")) {
432 		if (mix_specified) {
433 			fprintf(stderr, "Ignoring -M option... Please use -M option"
434 				" only when using rw or randrw.\n");
435 		}
436 	}
437 
438 	if (!strcmp(workload_type, "rw") ||
439 	    !strcmp(workload_type, "randrw")) {
440 		if (g_rw_percentage < 0 || g_rw_percentage > 100) {
441 			fprintf(stderr,
442 				"-M must be specified to value from 0 to 100 "
443 				"for rw or randrw.\n");
444 			return 1;
445 		}
446 	}
447 
448 	if (!strcmp(workload_type, "read") ||
449 	    !strcmp(workload_type, "write") ||
450 	    !strcmp(workload_type, "rw")) {
451 		g_is_random = 0;
452 	} else {
453 		g_is_random = 1;
454 	}
455 
456 	return 0;
457 }
458 
459 static int
460 register_worker(void)
461 {
462 	struct worker_thread *worker;
463 
464 	worker = malloc(sizeof(struct worker_thread));
465 	if (worker == NULL) {
466 		perror("worker_thread malloc");
467 		return -1;
468 	}
469 
470 	memset(worker, 0, sizeof(struct worker_thread));
471 	TAILQ_INIT(&worker->ns_ctx);
472 	worker->lcore = spdk_env_get_current_core();
473 
474 	g_worker = worker;
475 
476 	return 0;
477 }
478 
479 
480 static bool
481 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
482 	 struct spdk_nvme_ctrlr_opts *opts)
483 {
484 	opts->disable_error_logging = true;
485 	return true;
486 }
487 
488 static void
489 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
490 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
491 {
492 	if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
493 		struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr);
494 
495 		/* QEMU emulated SSDs can't handle this test, so we will skip them.  QEMU NVMe SSDs
496 		 * report themselves as VID == (Intel|Red Hat).  So we need to check this specific
497 		 * (0x5845|0x0010) device ID to know whether it's QEMU or not.
498 		 */
499 		if ((spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL &&
500 		     spdk_pci_device_get_device_id(dev) == 0x5845) ||
501 		    (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_REDHAT &&
502 		     spdk_pci_device_get_device_id(dev) == 0x0010)) {
503 			g_qemu_ssd_found = true;
504 			printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr);
505 			return;
506 		}
507 	}
508 
509 	register_ctrlr(ctrlr);
510 }
511 
512 static int
513 register_controllers(void)
514 {
515 	printf("Initializing NVMe Controllers\n");
516 
517 	if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
518 		fprintf(stderr, "spdk_nvme_probe() failed\n");
519 		return 1;
520 	}
521 
522 	return 0;
523 }
524 
525 static void
526 unregister_controllers(void)
527 {
528 	struct ctrlr_entry *entry, *tmp;
529 	struct spdk_nvme_detach_ctx *detach_ctx = NULL;
530 
531 	TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) {
532 		TAILQ_REMOVE(&g_controllers, entry, link);
533 		spdk_nvme_detach_async(entry->ctrlr, &detach_ctx);
534 		free(entry);
535 	}
536 
537 	if (detach_ctx) {
538 		spdk_nvme_detach_poll(detach_ctx);
539 	}
540 }
541 
542 static int
543 associate_workers_with_ns(void)
544 {
545 	struct ns_entry		*entry = TAILQ_FIRST(&g_namespaces);
546 	struct worker_thread	*worker = g_worker;
547 	struct ns_worker_ctx	*ns_ctx;
548 	int			i, count;
549 
550 	count = g_num_namespaces;
551 
552 	for (i = 0; i < count; i++) {
553 		if (entry == NULL) {
554 			break;
555 		}
556 		ns_ctx = malloc(sizeof(struct ns_worker_ctx));
557 		if (!ns_ctx) {
558 			return -1;
559 		}
560 		memset(ns_ctx, 0, sizeof(*ns_ctx));
561 
562 		printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
563 		ns_ctx->entry = entry;
564 		TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link);
565 
566 		entry = TAILQ_NEXT(entry, link);;
567 		if (entry == NULL) {
568 			entry = TAILQ_FIRST(&g_namespaces);
569 		}
570 	}
571 
572 	return 0;
573 }
574 
575 static void
576 unregister_worker(void)
577 {
578 	struct ns_worker_ctx	*ns_ctx, *tmp;
579 
580 	assert(g_worker != NULL);
581 
582 	TAILQ_FOREACH_SAFE(ns_ctx, &g_worker->ns_ctx, link, tmp) {
583 		TAILQ_REMOVE(&g_worker->ns_ctx, ns_ctx, link);
584 		free(ns_ctx);
585 	}
586 
587 	free(g_worker);
588 	g_worker = NULL;
589 }
590 
591 static int
592 run_nvme_reset_cycle(void)
593 {
594 	struct worker_thread *worker = g_worker;
595 	struct ns_worker_ctx *ns_ctx;
596 
597 	if (work_fn(worker) != 0) {
598 		return -1;
599 	}
600 
601 	if (print_stats() != 0) {
602 		return -1;
603 	}
604 
605 	TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
606 		ns_ctx->io_completed = 0;
607 		ns_ctx->io_completed_error = 0;
608 		ns_ctx->io_submitted = 0;
609 		ns_ctx->is_draining = false;
610 	}
611 
612 	return 0;
613 }
614 
615 static void
616 free_tasks(void)
617 {
618 	if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) {
619 		fprintf(stderr, "task_pool count is %zu but should be %d\n",
620 			spdk_mempool_count(task_pool), TASK_POOL_NUM);
621 	}
622 	spdk_mempool_free(task_pool);
623 }
624 
625 int
626 main(int argc, char **argv)
627 {
628 	int			rc;
629 	int			i;
630 	struct spdk_env_opts	opts;
631 
632 
633 	rc = parse_args(argc, argv);
634 	if (rc != 0) {
635 		return rc;
636 	}
637 
638 	opts.opts_size = sizeof(opts);
639 	spdk_env_opts_init(&opts);
640 	opts.name = "reset";
641 	opts.core_mask = "0x1";
642 	opts.shm_id = 0;
643 	if (spdk_env_init(&opts) < 0) {
644 		fprintf(stderr, "Unable to initialize SPDK env\n");
645 		return 1;
646 	}
647 
648 	if (register_controllers() != 0) {
649 		return 1;
650 	}
651 
652 	if (TAILQ_EMPTY(&g_controllers)) {
653 		printf("No NVMe controller found, %s exiting\n", argv[0]);
654 		return g_qemu_ssd_found ? 0 : 1;
655 	}
656 
657 	task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM,
658 					sizeof(struct reset_task),
659 					64, SPDK_ENV_NUMA_ID_ANY);
660 	if (!task_pool) {
661 		fprintf(stderr, "Cannot create task pool\n");
662 		return 1;
663 	}
664 
665 	g_tsc_rate = spdk_get_ticks_hz();
666 
667 	if (register_worker() != 0) {
668 		return 1;
669 	}
670 
671 	if (associate_workers_with_ns() != 0) {
672 		rc = 1;
673 		goto cleanup;
674 	}
675 
676 	printf("Initialization complete. Launching workers.\n");
677 
678 	for (i = 2; i >= 0; i--) {
679 		rc = run_nvme_reset_cycle();
680 		if (rc != 0) {
681 			goto cleanup;
682 		}
683 	}
684 
685 cleanup:
686 	unregister_controllers();
687 	unregister_worker();
688 	free_tasks();
689 
690 	if (rc != 0) {
691 		fprintf(stderr, "%s: errors occurred\n", argv[0]);
692 	}
693 
694 	return rc;
695 }
696