xref: /spdk/test/nvme/aer/aer.c (revision 32999ab917f67af61872f868585fd3d78ad6fb8a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/log.h"
37 #include "spdk/nvme.h"
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40 
41 #define MAX_DEVS 64
42 
43 struct dev {
44 	struct spdk_nvme_ctrlr				*ctrlr;
45 	struct spdk_nvme_health_information_page	*health_page;
46 	struct spdk_nvme_ns_list			*changed_ns_list;
47 	uint32_t					orig_temp_threshold;
48 	char						name[SPDK_NVMF_TRADDR_MAX_LEN + 1];
49 };
50 
51 static void get_feature_test(struct dev *dev);
52 
53 static struct dev g_devs[MAX_DEVS];
54 static int g_num_devs = 0;
55 
56 #define foreach_dev(iter) \
57 	for (iter = g_devs; iter - g_devs < g_num_devs; iter++)
58 
59 static int g_outstanding_commands = 0;
60 static int g_aer_done = 0;
61 static int g_temperature_done = 0;
62 static int g_failed = 0;
63 static struct spdk_nvme_transport_id g_trid;
64 static char *g_touch_file;
65 
66 /* Enable AER temperature test */
67 static int g_enable_temp_test = 0;
68 /* Enable AER namespace attribute notice test, this variable holds
69  * the NSID that is expected to be in the Changed NS List.
70  */
71 static uint32_t g_expected_ns_test = 0;
72 
73 static void
74 set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
75 {
76 	struct dev *dev = cb_arg;
77 
78 	g_outstanding_commands--;
79 
80 	if (spdk_nvme_cpl_is_error(cpl)) {
81 		printf("%s: set feature (temp threshold) failed\n", dev->name);
82 		g_failed = 1;
83 		return;
84 	}
85 
86 	/* Admin command completions are synchronized by the NVMe driver,
87 	 * so we don't need to do any special locking here. */
88 	g_temperature_done++;
89 }
90 
91 static int
92 set_temp_threshold(struct dev *dev, uint32_t temp)
93 {
94 	struct spdk_nvme_cmd cmd = {};
95 	int rc;
96 
97 	cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
98 	cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
99 	cmd.cdw11_bits.feat_temp_threshold.bits.tmpth = temp;
100 
101 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev);
102 	if (rc == 0) {
103 		g_outstanding_commands++;
104 	}
105 
106 	return rc;
107 }
108 
109 static void
110 get_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
111 {
112 	struct dev *dev = cb_arg;
113 
114 	g_outstanding_commands--;
115 
116 	if (spdk_nvme_cpl_is_error(cpl)) {
117 		printf("%s: get feature (temp threshold) failed\n", dev->name);
118 		g_failed = 1;
119 		return;
120 	}
121 
122 	dev->orig_temp_threshold = cpl->cdw0;
123 	printf("%s: original temperature threshold: %u Kelvin (%d Celsius)\n",
124 	       dev->name, dev->orig_temp_threshold, dev->orig_temp_threshold - 273);
125 
126 	g_temperature_done++;
127 }
128 
129 static int
130 get_temp_threshold(struct dev *dev)
131 {
132 	struct spdk_nvme_cmd cmd = {};
133 	int rc;
134 
135 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
136 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
137 
138 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, get_temp_completion, dev);
139 	if (rc == 0) {
140 		g_outstanding_commands++;
141 	}
142 
143 	return rc;
144 }
145 
146 static void
147 print_health_page(struct dev *dev, struct spdk_nvme_health_information_page *hip)
148 {
149 	printf("%s: Current Temperature:         %u Kelvin (%d Celsius)\n",
150 	       dev->name, hip->temperature, hip->temperature - 273);
151 }
152 
153 static void
154 get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
155 {
156 	struct dev *dev = cb_arg;
157 
158 	g_outstanding_commands --;
159 
160 	if (spdk_nvme_cpl_is_error(cpl)) {
161 		printf("%s: get log page failed\n", dev->name);
162 		g_failed = 1;
163 		return;
164 	}
165 
166 	print_health_page(dev, dev->health_page);
167 	g_aer_done++;
168 }
169 
170 static void
171 get_changed_ns_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
172 {
173 	struct dev *dev = cb_arg;
174 	bool found = false;
175 	uint32_t i;
176 
177 	g_outstanding_commands --;
178 
179 	if (spdk_nvme_cpl_is_error(cpl)) {
180 		printf("%s: get log page failed\n", dev->name);
181 		g_failed = 1;
182 		return;
183 	}
184 
185 	/* Let's compare the expected namespce ID is
186 	 * in changed namespace list
187 	 */
188 	if (dev->changed_ns_list->ns_list[0] != 0xffffffffu) {
189 		for (i = 0; i < sizeof(*dev->changed_ns_list) / sizeof(uint32_t); i++) {
190 			if (g_expected_ns_test == dev->changed_ns_list->ns_list[i]) {
191 				printf("%s: changed NS list contains expected NSID: %u\n",
192 				       dev->name, g_expected_ns_test);
193 				found = true;
194 				break;
195 			}
196 		}
197 	}
198 
199 	if (!found) {
200 		printf("%s: Error: Can't find expected NSID %u\n", dev->name, g_expected_ns_test);
201 		g_failed = 1;
202 	}
203 
204 	g_aer_done++;
205 }
206 
207 static int
208 get_health_log_page(struct dev *dev)
209 {
210 	int rc;
211 
212 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION,
213 					      SPDK_NVME_GLOBAL_NS_TAG, dev->health_page, sizeof(*dev->health_page), 0,
214 					      get_health_log_page_completion, dev);
215 
216 	if (rc == 0) {
217 		g_outstanding_commands++;
218 	}
219 
220 	return rc;
221 }
222 
223 static int
224 get_changed_ns_log_page(struct dev *dev)
225 {
226 	int rc;
227 
228 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_CHANGED_NS_LIST,
229 					      SPDK_NVME_GLOBAL_NS_TAG, dev->changed_ns_list,
230 					      sizeof(*dev->changed_ns_list), 0,
231 					      get_changed_ns_log_page_completion, dev);
232 
233 	if (rc == 0) {
234 		g_outstanding_commands++;
235 	}
236 
237 	return rc;
238 }
239 
240 static void
241 cleanup(void)
242 {
243 	struct dev *dev;
244 
245 	foreach_dev(dev) {
246 		if (dev->health_page) {
247 			spdk_free(dev->health_page);
248 		}
249 		if (dev->changed_ns_list) {
250 			spdk_free(dev->changed_ns_list);
251 		}
252 	}
253 }
254 
255 static void
256 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
257 {
258 	uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
259 	struct dev *dev = arg;
260 
261 	if (spdk_nvme_cpl_is_error(cpl)) {
262 		printf("%s: AER failed\n", dev->name);
263 		g_failed = 1;
264 		return;
265 	}
266 
267 	printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);
268 
269 	if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) {
270 		/* Set the temperature threshold back to the original value
271 		 * so the AER doesn't trigger again.
272 		 */
273 		set_temp_threshold(dev, dev->orig_temp_threshold);
274 		get_health_log_page(dev);
275 	} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
276 		get_changed_ns_log_page(dev);
277 	}
278 }
279 
280 static void
281 usage(const char *program_name)
282 {
283 	printf("%s [options]", program_name);
284 	printf("\n");
285 	printf("options:\n");
286 	printf(" -T         enable temperature tests\n");
287 	printf(" -n         expected Namespace attribute notice ID\n");
288 	printf(" -t <file>  touch specified file when ready to receive AER\n");
289 	printf(" -r trid    remote NVMe over Fabrics target address\n");
290 	printf("    Format: 'key:value [key:value] ...'\n");
291 	printf("    Keys:\n");
292 	printf("     trtype      Transport type (e.g. RDMA)\n");
293 	printf("     adrfam      Address family (e.g. IPv4, IPv6)\n");
294 	printf("     traddr      Transport address (e.g. 192.168.100.8)\n");
295 	printf("     trsvcid     Transport service identifier (e.g. 4420)\n");
296 	printf("     subnqn      Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
297 	printf("    Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n");
298 
299 	spdk_log_usage(stdout, "-L");
300 
301 	printf(" -v         verbose (enable warnings)\n");
302 	printf(" -H         show this usage\n");
303 }
304 
305 static int
306 parse_args(int argc, char **argv)
307 {
308 	int op, rc;
309 	long int val;
310 
311 	spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
312 	snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
313 
314 	while ((op = getopt(argc, argv, "n:r:t:HL:T")) != -1) {
315 		switch (op) {
316 		case 'n':
317 			val = spdk_strtol(optarg, 10);
318 			if (val < 0) {
319 				fprintf(stderr, "Invalid NS attribute notice ID\n");
320 				return val;
321 			}
322 			g_expected_ns_test = (uint32_t)val;
323 			break;
324 		case 'r':
325 			if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) {
326 				fprintf(stderr, "Error parsing transport address\n");
327 				return 1;
328 			}
329 			break;
330 		case 't':
331 			g_touch_file = optarg;
332 			break;
333 		case 'L':
334 			rc = spdk_log_set_flag(optarg);
335 			if (rc < 0) {
336 				fprintf(stderr, "unknown flag\n");
337 				usage(argv[0]);
338 				exit(EXIT_FAILURE);
339 			}
340 #ifdef DEBUG
341 			spdk_log_set_print_level(SPDK_LOG_DEBUG);
342 #endif
343 			break;
344 		case 'T':
345 			g_enable_temp_test = 1;
346 			break;
347 		case 'H':
348 			usage(argv[0]);
349 			exit(EXIT_SUCCESS);
350 		default:
351 			usage(argv[0]);
352 			return 1;
353 		}
354 	}
355 
356 	return 0;
357 }
358 
359 static bool
360 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
361 	 struct spdk_nvme_ctrlr_opts *opts)
362 {
363 	printf("Attaching to %s\n", trid->traddr);
364 
365 	return true;
366 }
367 
368 static void
369 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
370 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
371 {
372 	struct dev *dev;
373 
374 	/* add to dev list */
375 	dev = &g_devs[g_num_devs++];
376 
377 	dev->ctrlr = ctrlr;
378 
379 	snprintf(dev->name, sizeof(dev->name), "%s",
380 		 trid->traddr);
381 
382 	printf("Attached to %s\n", dev->name);
383 
384 	dev->health_page = spdk_zmalloc(sizeof(*dev->health_page), 4096, NULL, SPDK_ENV_LCORE_ID_ANY,
385 					SPDK_MALLOC_DMA);
386 	if (dev->health_page == NULL) {
387 		printf("Allocation error (health page)\n");
388 		g_failed = 1;
389 	}
390 	dev->changed_ns_list = spdk_zmalloc(sizeof(*dev->changed_ns_list), 4096, NULL,
391 					    SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
392 	if (dev->changed_ns_list == NULL) {
393 		printf("Allocation error (changed namespace list page)\n");
394 		g_failed = 1;
395 	}
396 }
397 
398 static void
399 get_feature_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl)
400 {
401 	struct dev *dev = cb_arg;
402 
403 	g_outstanding_commands--;
404 
405 	if (spdk_nvme_cpl_is_error(cpl)) {
406 		printf("%s: get number of queues failed\n", dev->name);
407 		g_failed = 1;
408 		return;
409 	}
410 
411 	if (g_aer_done < g_num_devs) {
412 		/*
413 		 * Resubmit Get Features command to continue filling admin queue
414 		 * while the test is running.
415 		 */
416 		get_feature_test(dev);
417 	}
418 }
419 
420 static void
421 get_feature_test(struct dev *dev)
422 {
423 	struct spdk_nvme_cmd cmd;
424 
425 	memset(&cmd, 0, sizeof(cmd));
426 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
427 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_NUMBER_OF_QUEUES;
428 	if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0,
429 					  get_feature_test_cb, dev) != 0) {
430 		printf("Failed to send Get Features command for dev=%p\n", dev);
431 		g_failed = 1;
432 		return;
433 	}
434 
435 	g_outstanding_commands++;
436 }
437 
438 static int
439 spdk_aer_temperature_test(void)
440 {
441 	struct dev *dev;
442 
443 	printf("Getting temperature thresholds of all controllers...\n");
444 	foreach_dev(dev) {
445 		/* Get the original temperature threshold */
446 		get_temp_threshold(dev);
447 	}
448 
449 	while (!g_failed && g_temperature_done < g_num_devs) {
450 		foreach_dev(dev) {
451 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
452 		}
453 	}
454 
455 	if (g_failed) {
456 		return g_failed;
457 	}
458 	g_temperature_done = 0;
459 	g_aer_done = 0;
460 
461 	/* Send admin commands to test admin queue wraparound while waiting for the AER */
462 	foreach_dev(dev) {
463 		get_feature_test(dev);
464 	}
465 
466 	if (g_failed) {
467 		return g_failed;
468 	}
469 
470 	printf("Waiting for all controllers to trigger AER...\n");
471 	foreach_dev(dev) {
472 		/* Set the temperature threshold to a low value */
473 		set_temp_threshold(dev, 200);
474 	}
475 
476 	if (g_failed) {
477 		return g_failed;
478 	}
479 
480 	while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
481 		foreach_dev(dev) {
482 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
483 		}
484 	}
485 
486 	if (g_failed) {
487 		return g_failed;
488 	}
489 
490 	return 0;
491 }
492 
493 static int
494 spdk_aer_changed_ns_test(void)
495 {
496 	struct dev *dev;
497 
498 	g_aer_done = 0;
499 
500 	printf("Starting namespce attribute notice tests for all controllers...\n");
501 
502 	foreach_dev(dev) {
503 		get_feature_test(dev);
504 	}
505 
506 	if (g_failed) {
507 		return g_failed;
508 	}
509 
510 	while (!g_failed && (g_aer_done < g_num_devs)) {
511 		foreach_dev(dev) {
512 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
513 		}
514 	}
515 
516 	if (g_failed) {
517 		return g_failed;
518 	}
519 
520 	return 0;
521 }
522 
523 int main(int argc, char **argv)
524 {
525 	struct dev		*dev;
526 	struct spdk_env_opts	opts;
527 	int			rc;
528 	struct spdk_nvme_detach_ctx *detach_ctx = NULL;
529 
530 	rc = parse_args(argc, argv);
531 	if (rc != 0) {
532 		return rc;
533 	}
534 
535 	spdk_env_opts_init(&opts);
536 	opts.name = "aer";
537 	opts.core_mask = "0x1";
538 	if (spdk_env_init(&opts) < 0) {
539 		fprintf(stderr, "Unable to initialize SPDK env\n");
540 		return 1;
541 	}
542 
543 	printf("Asynchronous Event Request test\n");
544 
545 	if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
546 		fprintf(stderr, "spdk_nvme_probe() failed\n");
547 		return 1;
548 	}
549 
550 	if (g_failed) {
551 		goto done;
552 	}
553 
554 	printf("Registering asynchronous event callbacks...\n");
555 	foreach_dev(dev) {
556 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
557 	}
558 
559 	if (g_touch_file) {
560 		int fd;
561 
562 		fd = open(g_touch_file, O_CREAT | O_EXCL | O_RDWR, S_IFREG);
563 		if (fd == -1) {
564 			fprintf(stderr, "Could not touch %s (%s).\n", g_touch_file, strerror(errno));
565 			g_failed = true;
566 			goto done;
567 		}
568 		close(fd);
569 	}
570 
571 	/* AER temperature test */
572 	if (g_enable_temp_test) {
573 		if (spdk_aer_temperature_test()) {
574 			goto done;
575 		}
576 	}
577 
578 	/* AER changed namespace list test */
579 	if (g_expected_ns_test) {
580 		if (spdk_aer_changed_ns_test()) {
581 			goto done;
582 		}
583 	}
584 
585 	printf("Cleaning up...\n");
586 
587 	while (g_outstanding_commands) {
588 		foreach_dev(dev) {
589 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
590 		}
591 	}
592 
593 	/* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */
594 	foreach_dev(dev) {
595 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL);
596 	}
597 
598 	foreach_dev(dev) {
599 		spdk_nvme_detach_async(dev->ctrlr, &detach_ctx);
600 	}
601 
602 	while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
603 		;
604 	}
605 
606 done:
607 	cleanup();
608 
609 	return g_failed;
610 }
611