xref: /spdk/test/nvme/aer/aer.c (revision d73077b84a71985da1db1c9847ea7c042189bae2)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/log.h"
37 #include "spdk/nvme.h"
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40 
41 #define MAX_DEVS 64
42 
43 struct dev {
44 	struct spdk_nvme_ctrlr				*ctrlr;
45 	struct spdk_nvme_health_information_page	*health_page;
46 	struct spdk_nvme_ns_list			*changed_ns_list;
47 	uint32_t					orig_temp_threshold;
48 	char						name[SPDK_NVMF_TRADDR_MAX_LEN + 1];
49 };
50 
51 static void get_feature_test(struct dev *dev);
52 
53 static struct dev g_devs[MAX_DEVS];
54 static int g_num_devs = 0;
55 
56 #define foreach_dev(iter) \
57 	for (iter = g_devs; iter - g_devs < g_num_devs; iter++)
58 
59 static int g_outstanding_commands = 0;
60 static int g_aer_done = 0;
61 static int g_temperature_done = 0;
62 static int g_failed = 0;
63 static struct spdk_nvme_transport_id g_trid;
64 static char *g_touch_file;
65 
66 /* Enable AER temperature test */
67 static int g_enable_temp_test = 0;
68 /* Enable AER namespace attribute notice test, this variable holds
69  * the NSID that is expected to be in the Changed NS List.
70  */
71 static uint32_t g_expected_ns_test = 0;
72 
73 static void
74 set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
75 {
76 	struct dev *dev = cb_arg;
77 
78 	g_outstanding_commands--;
79 
80 	if (spdk_nvme_cpl_is_error(cpl)) {
81 		printf("%s: set feature (temp threshold) failed\n", dev->name);
82 		g_failed = 1;
83 		return;
84 	}
85 
86 	/* Admin command completions are synchronized by the NVMe driver,
87 	 * so we don't need to do any special locking here. */
88 	g_temperature_done++;
89 }
90 
91 static int
92 set_temp_threshold(struct dev *dev, uint32_t temp)
93 {
94 	struct spdk_nvme_cmd cmd = {};
95 	int rc;
96 
97 	cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
98 	cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
99 	cmd.cdw11_bits.feat_temp_threshold.bits.tmpth = temp;
100 
101 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev);
102 	if (rc == 0) {
103 		g_outstanding_commands++;
104 	}
105 
106 	return rc;
107 }
108 
109 static void
110 get_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
111 {
112 	struct dev *dev = cb_arg;
113 
114 	g_outstanding_commands--;
115 
116 	if (spdk_nvme_cpl_is_error(cpl)) {
117 		printf("%s: get feature (temp threshold) failed\n", dev->name);
118 		g_failed = 1;
119 		return;
120 	}
121 
122 	dev->orig_temp_threshold = cpl->cdw0;
123 	printf("%s: original temperature threshold: %u Kelvin (%d Celsius)\n",
124 	       dev->name, dev->orig_temp_threshold, dev->orig_temp_threshold - 273);
125 
126 	g_temperature_done++;
127 }
128 
129 static int
130 get_temp_threshold(struct dev *dev)
131 {
132 	struct spdk_nvme_cmd cmd = {};
133 	int rc;
134 
135 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
136 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
137 
138 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, get_temp_completion, dev);
139 	if (rc == 0) {
140 		g_outstanding_commands++;
141 	}
142 
143 	return rc;
144 }
145 
146 static void
147 print_health_page(struct dev *dev, struct spdk_nvme_health_information_page *hip)
148 {
149 	printf("%s: Current Temperature:         %u Kelvin (%d Celsius)\n",
150 	       dev->name, hip->temperature, hip->temperature - 273);
151 }
152 
153 static void
154 get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
155 {
156 	struct dev *dev = cb_arg;
157 
158 	g_outstanding_commands --;
159 
160 	if (spdk_nvme_cpl_is_error(cpl)) {
161 		printf("%s: get log page failed\n", dev->name);
162 		g_failed = 1;
163 		return;
164 	}
165 
166 	print_health_page(dev, dev->health_page);
167 	g_aer_done++;
168 }
169 
170 static void
171 get_changed_ns_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
172 {
173 	struct dev *dev = cb_arg;
174 	bool found = false;
175 	uint32_t i;
176 
177 	g_outstanding_commands --;
178 
179 	if (spdk_nvme_cpl_is_error(cpl)) {
180 		printf("%s: get log page failed\n", dev->name);
181 		g_failed = 1;
182 		return;
183 	}
184 
185 	/* Let's compare the expected namespce ID is
186 	 * in changed namespace list
187 	 */
188 	if (dev->changed_ns_list->ns_list[0] != 0xffffffffu) {
189 		for (i = 0; i < sizeof(*dev->changed_ns_list) / sizeof(uint32_t); i++) {
190 			if (g_expected_ns_test == dev->changed_ns_list->ns_list[i]) {
191 				printf("%s: changed NS list contains expected NSID: %u\n",
192 				       dev->name, g_expected_ns_test);
193 				found = true;
194 				break;
195 			}
196 		}
197 	}
198 
199 	if (!found) {
200 		printf("%s: Error: Can't find expected NSID %u\n", dev->name, g_expected_ns_test);
201 		g_failed = 1;
202 	}
203 
204 	g_aer_done++;
205 }
206 
207 static int
208 get_health_log_page(struct dev *dev)
209 {
210 	int rc;
211 
212 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION,
213 					      SPDK_NVME_GLOBAL_NS_TAG, dev->health_page, sizeof(*dev->health_page), 0,
214 					      get_health_log_page_completion, dev);
215 
216 	if (rc == 0) {
217 		g_outstanding_commands++;
218 	}
219 
220 	return rc;
221 }
222 
223 static int
224 get_changed_ns_log_page(struct dev *dev)
225 {
226 	int rc;
227 
228 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_CHANGED_NS_LIST,
229 					      SPDK_NVME_GLOBAL_NS_TAG, dev->changed_ns_list,
230 					      sizeof(*dev->changed_ns_list), 0,
231 					      get_changed_ns_log_page_completion, dev);
232 
233 	if (rc == 0) {
234 		g_outstanding_commands++;
235 	}
236 
237 	return rc;
238 }
239 
240 static void
241 cleanup(void)
242 {
243 	struct dev *dev;
244 
245 	foreach_dev(dev) {
246 		if (dev->health_page) {
247 			spdk_free(dev->health_page);
248 		}
249 		if (dev->changed_ns_list) {
250 			spdk_free(dev->changed_ns_list);
251 		}
252 	}
253 }
254 
255 static void
256 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
257 {
258 	uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
259 	struct dev *dev = arg;
260 
261 	if (spdk_nvme_cpl_is_error(cpl)) {
262 		printf("%s: AER failed\n", dev->name);
263 		g_failed = 1;
264 		return;
265 	}
266 
267 	printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);
268 
269 	if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) {
270 		/* Set the temperature threshold back to the original value
271 		 * so the AER doesn't trigger again.
272 		 */
273 		set_temp_threshold(dev, dev->orig_temp_threshold);
274 		get_health_log_page(dev);
275 	} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
276 		get_changed_ns_log_page(dev);
277 	}
278 }
279 
280 static void
281 usage(const char *program_name)
282 {
283 	printf("%s [options]", program_name);
284 	printf("\n");
285 	printf("options:\n");
286 	printf(" -T         enable temperature tests\n");
287 	printf(" -n         expected Namespace attribute notice ID\n");
288 	printf(" -t <file>  touch specified file when ready to receive AER\n");
289 	printf(" -r trid    remote NVMe over Fabrics target address\n");
290 	printf("    Format: 'key:value [key:value] ...'\n");
291 	printf("    Keys:\n");
292 	printf("     trtype      Transport type (e.g. RDMA)\n");
293 	printf("     adrfam      Address family (e.g. IPv4, IPv6)\n");
294 	printf("     traddr      Transport address (e.g. 192.168.100.8)\n");
295 	printf("     trsvcid     Transport service identifier (e.g. 4420)\n");
296 	printf("     subnqn      Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
297 	printf("    Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n");
298 
299 	spdk_log_usage(stdout, "-L");
300 
301 	printf(" -v         verbose (enable warnings)\n");
302 	printf(" -H         show this usage\n");
303 }
304 
305 static int
306 parse_args(int argc, char **argv)
307 {
308 	int op, rc;
309 	long int val;
310 
311 	spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
312 	snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
313 
314 	while ((op = getopt(argc, argv, "n:r:t:HL:T")) != -1) {
315 		switch (op) {
316 		case 'n':
317 			val = spdk_strtol(optarg, 10);
318 			if (val < 0) {
319 				fprintf(stderr, "Invalid NS attribute notice ID\n");
320 				return val;
321 			}
322 			g_expected_ns_test = (uint32_t)val;
323 			break;
324 		case 'r':
325 			if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) {
326 				fprintf(stderr, "Error parsing transport address\n");
327 				return 1;
328 			}
329 			break;
330 		case 't':
331 			g_touch_file = optarg;
332 			break;
333 		case 'L':
334 			rc = spdk_log_set_flag(optarg);
335 			if (rc < 0) {
336 				fprintf(stderr, "unknown flag\n");
337 				usage(argv[0]);
338 				exit(EXIT_FAILURE);
339 			}
340 #ifdef DEBUG
341 			spdk_log_set_print_level(SPDK_LOG_DEBUG);
342 #endif
343 			break;
344 		case 'T':
345 			g_enable_temp_test = 1;
346 			break;
347 		case 'H':
348 		default:
349 			usage(argv[0]);
350 			return 1;
351 		}
352 	}
353 
354 	return 0;
355 }
356 
357 static bool
358 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
359 	 struct spdk_nvme_ctrlr_opts *opts)
360 {
361 	printf("Attaching to %s\n", trid->traddr);
362 
363 	return true;
364 }
365 
366 static void
367 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
368 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
369 {
370 	struct dev *dev;
371 
372 	/* add to dev list */
373 	dev = &g_devs[g_num_devs++];
374 
375 	dev->ctrlr = ctrlr;
376 
377 	snprintf(dev->name, sizeof(dev->name), "%s",
378 		 trid->traddr);
379 
380 	printf("Attached to %s\n", dev->name);
381 
382 	dev->health_page = spdk_zmalloc(sizeof(*dev->health_page), 4096, NULL, SPDK_ENV_LCORE_ID_ANY,
383 					SPDK_MALLOC_DMA);
384 	if (dev->health_page == NULL) {
385 		printf("Allocation error (health page)\n");
386 		g_failed = 1;
387 	}
388 	dev->changed_ns_list = spdk_zmalloc(sizeof(*dev->changed_ns_list), 4096, NULL,
389 					    SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
390 	if (dev->changed_ns_list == NULL) {
391 		printf("Allocation error (changed namespace list page)\n");
392 		g_failed = 1;
393 	}
394 }
395 
396 static void
397 get_feature_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl)
398 {
399 	struct dev *dev = cb_arg;
400 
401 	g_outstanding_commands--;
402 
403 	if (spdk_nvme_cpl_is_error(cpl)) {
404 		printf("%s: get number of queues failed\n", dev->name);
405 		g_failed = 1;
406 		return;
407 	}
408 
409 	if (g_aer_done < g_num_devs) {
410 		/*
411 		 * Resubmit Get Features command to continue filling admin queue
412 		 * while the test is running.
413 		 */
414 		get_feature_test(dev);
415 	}
416 }
417 
418 static void
419 get_feature_test(struct dev *dev)
420 {
421 	struct spdk_nvme_cmd cmd;
422 
423 	memset(&cmd, 0, sizeof(cmd));
424 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
425 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_NUMBER_OF_QUEUES;
426 	if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0,
427 					  get_feature_test_cb, dev) != 0) {
428 		printf("Failed to send Get Features command for dev=%p\n", dev);
429 		g_failed = 1;
430 		return;
431 	}
432 
433 	g_outstanding_commands++;
434 }
435 
436 static int
437 spdk_aer_temperature_test(void)
438 {
439 	struct dev *dev;
440 
441 	printf("Getting temperature thresholds of all controllers...\n");
442 	foreach_dev(dev) {
443 		/* Get the original temperature threshold */
444 		get_temp_threshold(dev);
445 	}
446 
447 	while (!g_failed && g_temperature_done < g_num_devs) {
448 		foreach_dev(dev) {
449 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
450 		}
451 	}
452 
453 	if (g_failed) {
454 		return g_failed;
455 	}
456 	g_temperature_done = 0;
457 	g_aer_done = 0;
458 
459 	/* Send admin commands to test admin queue wraparound while waiting for the AER */
460 	foreach_dev(dev) {
461 		get_feature_test(dev);
462 	}
463 
464 	if (g_failed) {
465 		return g_failed;
466 	}
467 
468 	printf("Waiting for all controllers to trigger AER...\n");
469 	foreach_dev(dev) {
470 		/* Set the temperature threshold to a low value */
471 		set_temp_threshold(dev, 200);
472 	}
473 
474 	if (g_failed) {
475 		return g_failed;
476 	}
477 
478 	while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
479 		foreach_dev(dev) {
480 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
481 		}
482 	}
483 
484 	if (g_failed) {
485 		return g_failed;
486 	}
487 
488 	return 0;
489 }
490 
491 static int
492 spdk_aer_changed_ns_test(void)
493 {
494 	struct dev *dev;
495 
496 	g_aer_done = 0;
497 
498 	printf("Starting namespce attribute notice tests for all controllers...\n");
499 
500 	foreach_dev(dev) {
501 		get_feature_test(dev);
502 	}
503 
504 	if (g_failed) {
505 		return g_failed;
506 	}
507 
508 	while (!g_failed && (g_aer_done < g_num_devs)) {
509 		foreach_dev(dev) {
510 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
511 		}
512 	}
513 
514 	if (g_failed) {
515 		return g_failed;
516 	}
517 
518 	return 0;
519 }
520 
521 int main(int argc, char **argv)
522 {
523 	struct dev		*dev;
524 	struct spdk_env_opts	opts;
525 	int			rc;
526 	struct spdk_nvme_detach_ctx *detach_ctx = NULL;
527 
528 	rc = parse_args(argc, argv);
529 	if (rc != 0) {
530 		return rc;
531 	}
532 
533 	spdk_env_opts_init(&opts);
534 	opts.name = "aer";
535 	opts.core_mask = "0x1";
536 	if (spdk_env_init(&opts) < 0) {
537 		fprintf(stderr, "Unable to initialize SPDK env\n");
538 		return 1;
539 	}
540 
541 	printf("Asynchronous Event Request test\n");
542 
543 	if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
544 		fprintf(stderr, "spdk_nvme_probe() failed\n");
545 		return 1;
546 	}
547 
548 	if (g_failed) {
549 		goto done;
550 	}
551 
552 	printf("Registering asynchronous event callbacks...\n");
553 	foreach_dev(dev) {
554 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
555 	}
556 
557 	if (g_touch_file) {
558 		int fd;
559 
560 		fd = open(g_touch_file, O_CREAT | O_EXCL | O_RDWR, S_IFREG);
561 		if (fd == -1) {
562 			fprintf(stderr, "Could not touch %s (%s).\n", g_touch_file, strerror(errno));
563 			g_failed = true;
564 			goto done;
565 		}
566 		close(fd);
567 	}
568 
569 	/* AER temperature test */
570 	if (g_enable_temp_test) {
571 		if (spdk_aer_temperature_test()) {
572 			goto done;
573 		}
574 	}
575 
576 	/* AER changed namespace list test */
577 	if (g_expected_ns_test) {
578 		if (spdk_aer_changed_ns_test()) {
579 			goto done;
580 		}
581 	}
582 
583 	printf("Cleaning up...\n");
584 
585 	while (g_outstanding_commands) {
586 		foreach_dev(dev) {
587 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
588 		}
589 	}
590 
591 	/* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */
592 	foreach_dev(dev) {
593 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL);
594 	}
595 
596 	foreach_dev(dev) {
597 		spdk_nvme_detach_async(dev->ctrlr, &detach_ctx);
598 	}
599 
600 	while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
601 		;
602 	}
603 
604 done:
605 	cleanup();
606 
607 	return g_failed;
608 }
609