xref: /spdk/test/nvme/aer/aer.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/log.h"
37 #include "spdk/nvme.h"
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40 
41 #define MAX_DEVS 64
42 
43 struct dev {
44 	struct spdk_nvme_ctrlr				*ctrlr;
45 	/* Expected changed NS ID state before AER */
46 	bool						ns_test_active;
47 	struct spdk_nvme_health_information_page	*health_page;
48 	uint32_t					orig_temp_threshold;
49 	char						name[SPDK_NVMF_TRADDR_MAX_LEN + 1];
50 };
51 
52 static void get_feature_test(struct dev *dev);
53 
54 static struct dev g_devs[MAX_DEVS];
55 static int g_num_devs = 0;
56 
57 #define foreach_dev(iter) \
58 	for (iter = g_devs; iter - g_devs < g_num_devs; iter++)
59 
60 static int g_outstanding_commands = 0;
61 static int g_aer_done = 0;
62 static int g_temperature_done = 0;
63 static int g_failed = 0;
64 static struct spdk_nvme_transport_id g_trid;
65 static char *g_touch_file;
66 
67 /* Enable AER temperature test */
68 static int g_enable_temp_test = 0;
69 /* Expected changed NS ID */
70 static uint32_t g_expected_ns_test = 0;
71 
72 static void
73 set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
74 {
75 	struct dev *dev = cb_arg;
76 
77 	g_outstanding_commands--;
78 
79 	if (spdk_nvme_cpl_is_error(cpl)) {
80 		printf("%s: set feature (temp threshold) failed\n", dev->name);
81 		g_failed = 1;
82 		return;
83 	}
84 
85 	/* Admin command completions are synchronized by the NVMe driver,
86 	 * so we don't need to do any special locking here. */
87 	g_temperature_done++;
88 }
89 
90 static int
91 set_temp_threshold(struct dev *dev, uint32_t temp)
92 {
93 	struct spdk_nvme_cmd cmd = {};
94 	int rc;
95 
96 	cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
97 	cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
98 	cmd.cdw11_bits.feat_temp_threshold.bits.tmpth = temp;
99 
100 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev);
101 	if (rc == 0) {
102 		g_outstanding_commands++;
103 	}
104 
105 	return rc;
106 }
107 
108 static void
109 get_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
110 {
111 	struct dev *dev = cb_arg;
112 
113 	g_outstanding_commands--;
114 
115 	if (spdk_nvme_cpl_is_error(cpl)) {
116 		printf("%s: get feature (temp threshold) failed\n", dev->name);
117 		g_failed = 1;
118 		return;
119 	}
120 
121 	dev->orig_temp_threshold = cpl->cdw0;
122 	printf("%s: original temperature threshold: %u Kelvin (%d Celsius)\n",
123 	       dev->name, dev->orig_temp_threshold, dev->orig_temp_threshold - 273);
124 
125 	g_temperature_done++;
126 }
127 
128 static int
129 get_temp_threshold(struct dev *dev)
130 {
131 	struct spdk_nvme_cmd cmd = {};
132 	int rc;
133 
134 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
135 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
136 
137 	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, get_temp_completion, dev);
138 	if (rc == 0) {
139 		g_outstanding_commands++;
140 	}
141 
142 	return rc;
143 }
144 
145 static void
146 print_health_page(struct dev *dev, struct spdk_nvme_health_information_page *hip)
147 {
148 	printf("%s: Current Temperature:         %u Kelvin (%d Celsius)\n",
149 	       dev->name, hip->temperature, hip->temperature - 273);
150 }
151 
152 static void
153 get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
154 {
155 	struct dev *dev = cb_arg;
156 
157 	g_outstanding_commands --;
158 
159 	if (spdk_nvme_cpl_is_error(cpl)) {
160 		printf("%s: get log page failed\n", dev->name);
161 		g_failed = 1;
162 		return;
163 	}
164 
165 	print_health_page(dev, dev->health_page);
166 	g_aer_done++;
167 }
168 
169 static int
170 get_health_log_page(struct dev *dev)
171 {
172 	int rc;
173 
174 	rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION,
175 					      SPDK_NVME_GLOBAL_NS_TAG, dev->health_page, sizeof(*dev->health_page), 0,
176 					      get_health_log_page_completion, dev);
177 
178 	if (rc == 0) {
179 		g_outstanding_commands++;
180 	}
181 
182 	return rc;
183 }
184 
185 static void
186 get_ns_state_test(struct dev *dev, uint32_t nsid)
187 {
188 	bool new_ns_state;
189 
190 	new_ns_state = spdk_nvme_ctrlr_is_active_ns(dev->ctrlr, nsid);
191 	if (new_ns_state == dev->ns_test_active) {
192 		g_failed = 1;
193 	}
194 }
195 
196 static void
197 cleanup(void)
198 {
199 	struct dev *dev;
200 
201 	foreach_dev(dev) {
202 		if (dev->health_page) {
203 			spdk_free(dev->health_page);
204 		}
205 	}
206 }
207 
208 static void
209 aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
210 {
211 	uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
212 	struct dev *dev = arg;
213 
214 	if (spdk_nvme_cpl_is_error(cpl)) {
215 		printf("%s: AER failed\n", dev->name);
216 		g_failed = 1;
217 		return;
218 	}
219 
220 	printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);
221 
222 	if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) {
223 		/* Set the temperature threshold back to the original value
224 		 * so the AER doesn't trigger again.
225 		 */
226 		set_temp_threshold(dev, dev->orig_temp_threshold);
227 		get_health_log_page(dev);
228 	} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
229 		get_ns_state_test(dev, g_expected_ns_test);
230 		g_aer_done++;
231 	}
232 }
233 
234 static void
235 usage(const char *program_name)
236 {
237 	printf("%s [options]", program_name);
238 	printf("\n");
239 	printf("options:\n");
240 	printf(" -T         enable temperature tests\n");
241 	printf(" -n         expected Namespace attribute notice ID\n");
242 	printf(" -t <file>  touch specified file when ready to receive AER\n");
243 	printf(" -r trid    remote NVMe over Fabrics target address\n");
244 	printf("    Format: 'key:value [key:value] ...'\n");
245 	printf("    Keys:\n");
246 	printf("     trtype      Transport type (e.g. RDMA)\n");
247 	printf("     adrfam      Address family (e.g. IPv4, IPv6)\n");
248 	printf("     traddr      Transport address (e.g. 192.168.100.8)\n");
249 	printf("     trsvcid     Transport service identifier (e.g. 4420)\n");
250 	printf("     subnqn      Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
251 	printf("    Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n");
252 
253 	spdk_log_usage(stdout, "-L");
254 
255 	printf(" -v         verbose (enable warnings)\n");
256 	printf(" -H         show this usage\n");
257 }
258 
259 static int
260 parse_args(int argc, char **argv)
261 {
262 	int op, rc;
263 	long int val;
264 
265 	spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
266 	snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
267 
268 	while ((op = getopt(argc, argv, "n:r:t:HL:T")) != -1) {
269 		switch (op) {
270 		case 'n':
271 			val = spdk_strtol(optarg, 10);
272 			if (val < 0) {
273 				fprintf(stderr, "Invalid NS attribute notice ID\n");
274 				return val;
275 			}
276 			g_expected_ns_test = (uint32_t)val;
277 			break;
278 		case 'r':
279 			if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) {
280 				fprintf(stderr, "Error parsing transport address\n");
281 				return 1;
282 			}
283 			break;
284 		case 't':
285 			g_touch_file = optarg;
286 			break;
287 		case 'L':
288 			rc = spdk_log_set_flag(optarg);
289 			if (rc < 0) {
290 				fprintf(stderr, "unknown flag\n");
291 				usage(argv[0]);
292 				exit(EXIT_FAILURE);
293 			}
294 #ifdef DEBUG
295 			spdk_log_set_print_level(SPDK_LOG_DEBUG);
296 #endif
297 			break;
298 		case 'T':
299 			g_enable_temp_test = 1;
300 			break;
301 		case 'H':
302 			usage(argv[0]);
303 			exit(EXIT_SUCCESS);
304 		default:
305 			usage(argv[0]);
306 			return 1;
307 		}
308 	}
309 
310 	return 0;
311 }
312 
313 static bool
314 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
315 	 struct spdk_nvme_ctrlr_opts *opts)
316 {
317 	printf("Attaching to %s\n", trid->traddr);
318 
319 	return true;
320 }
321 
322 static void
323 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
324 	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
325 {
326 	struct dev *dev;
327 
328 	/* add to dev list */
329 	dev = &g_devs[g_num_devs++];
330 
331 	dev->ctrlr = ctrlr;
332 
333 	snprintf(dev->name, sizeof(dev->name), "%s",
334 		 trid->traddr);
335 
336 	printf("Attached to %s\n", dev->name);
337 
338 	dev->health_page = spdk_zmalloc(sizeof(*dev->health_page), 4096, NULL, SPDK_ENV_LCORE_ID_ANY,
339 					SPDK_MALLOC_DMA);
340 	if (dev->health_page == NULL) {
341 		printf("Allocation error (health page)\n");
342 		g_failed = 1;
343 	}
344 }
345 
346 static void
347 get_feature_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl)
348 {
349 	struct dev *dev = cb_arg;
350 
351 	g_outstanding_commands--;
352 
353 	if (spdk_nvme_cpl_is_error(cpl)) {
354 		printf("%s: get number of queues failed\n", dev->name);
355 		g_failed = 1;
356 		return;
357 	}
358 
359 	if (g_aer_done < g_num_devs) {
360 		/*
361 		 * Resubmit Get Features command to continue filling admin queue
362 		 * while the test is running.
363 		 */
364 		get_feature_test(dev);
365 	}
366 }
367 
368 static void
369 get_feature_test(struct dev *dev)
370 {
371 	struct spdk_nvme_cmd cmd;
372 
373 	memset(&cmd, 0, sizeof(cmd));
374 	cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
375 	cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_NUMBER_OF_QUEUES;
376 	if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0,
377 					  get_feature_test_cb, dev) != 0) {
378 		printf("Failed to send Get Features command for dev=%p\n", dev);
379 		g_failed = 1;
380 		return;
381 	}
382 
383 	g_outstanding_commands++;
384 }
385 
386 static int
387 spdk_aer_temperature_test(void)
388 {
389 	struct dev *dev;
390 
391 	printf("Getting temperature thresholds of all controllers...\n");
392 	foreach_dev(dev) {
393 		/* Get the original temperature threshold */
394 		get_temp_threshold(dev);
395 	}
396 
397 	while (!g_failed && g_temperature_done < g_num_devs) {
398 		foreach_dev(dev) {
399 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
400 		}
401 	}
402 
403 	if (g_failed) {
404 		return g_failed;
405 	}
406 	g_temperature_done = 0;
407 	g_aer_done = 0;
408 
409 	/* Send admin commands to test admin queue wraparound while waiting for the AER */
410 	foreach_dev(dev) {
411 		get_feature_test(dev);
412 	}
413 
414 	if (g_failed) {
415 		return g_failed;
416 	}
417 
418 	printf("Waiting for all controllers to trigger AER...\n");
419 	foreach_dev(dev) {
420 		/* Set the temperature threshold to a low value */
421 		set_temp_threshold(dev, 200);
422 	}
423 
424 	if (g_failed) {
425 		return g_failed;
426 	}
427 
428 	while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
429 		foreach_dev(dev) {
430 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
431 		}
432 	}
433 
434 	if (g_failed) {
435 		return g_failed;
436 	}
437 
438 	return 0;
439 }
440 
441 static int
442 spdk_aer_changed_ns_test(void)
443 {
444 	struct dev *dev;
445 
446 	g_aer_done = 0;
447 
448 	printf("Starting namespce attribute notice tests for all controllers...\n");
449 
450 	foreach_dev(dev) {
451 		get_feature_test(dev);
452 		dev->ns_test_active = spdk_nvme_ctrlr_is_active_ns(dev->ctrlr, g_expected_ns_test);
453 	}
454 
455 	if (g_failed) {
456 		return g_failed;
457 	}
458 
459 	while (!g_failed && (g_aer_done < g_num_devs)) {
460 		foreach_dev(dev) {
461 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
462 		}
463 	}
464 
465 	if (g_failed) {
466 		return g_failed;
467 	}
468 
469 	return 0;
470 }
471 
472 int main(int argc, char **argv)
473 {
474 	struct dev		*dev;
475 	struct spdk_env_opts	opts;
476 	int			rc;
477 	struct spdk_nvme_detach_ctx *detach_ctx = NULL;
478 
479 	rc = parse_args(argc, argv);
480 	if (rc != 0) {
481 		return rc;
482 	}
483 
484 	spdk_env_opts_init(&opts);
485 	opts.name = "aer";
486 	opts.core_mask = "0x1";
487 	if (spdk_env_init(&opts) < 0) {
488 		fprintf(stderr, "Unable to initialize SPDK env\n");
489 		return 1;
490 	}
491 
492 	printf("Asynchronous Event Request test\n");
493 
494 	if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
495 		fprintf(stderr, "spdk_nvme_probe() failed\n");
496 		return 1;
497 	}
498 
499 	if (g_failed) {
500 		goto done;
501 	}
502 
503 	printf("Registering asynchronous event callbacks...\n");
504 	foreach_dev(dev) {
505 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
506 	}
507 
508 	if (g_touch_file) {
509 		int fd;
510 
511 		fd = open(g_touch_file, O_CREAT | O_EXCL | O_RDWR, S_IFREG);
512 		if (fd == -1) {
513 			fprintf(stderr, "Could not touch %s (%s).\n", g_touch_file, strerror(errno));
514 			g_failed = true;
515 			goto done;
516 		}
517 		close(fd);
518 	}
519 
520 	/* AER temperature test */
521 	if (g_enable_temp_test) {
522 		if (spdk_aer_temperature_test()) {
523 			goto done;
524 		}
525 	}
526 
527 	/* AER changed namespace list test */
528 	if (g_expected_ns_test) {
529 		if (spdk_aer_changed_ns_test()) {
530 			goto done;
531 		}
532 	}
533 
534 	printf("Cleaning up...\n");
535 
536 	while (g_outstanding_commands) {
537 		foreach_dev(dev) {
538 			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
539 		}
540 	}
541 
542 	/* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */
543 	foreach_dev(dev) {
544 		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL);
545 	}
546 
547 	foreach_dev(dev) {
548 		spdk_nvme_detach_async(dev->ctrlr, &detach_ctx);
549 	}
550 
551 	if (detach_ctx) {
552 		spdk_nvme_detach_poll(detach_ctx);
553 	}
554 
555 done:
556 	cleanup();
557 
558 	return g_failed;
559 }
560