xref: /netbsd-src/sys/dev/acpi/apei.c (revision ccd9df534e375a4366c5b55f23782053c7a98d82)
1 /*	$NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $	*/
2 
3 /*-
4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * APEI: ACPI Platform Error Interface
31  *
32  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33  *
34  * XXX dtrace probes
35  *
36  * XXX call _OSC appropriately to announce to the platform that we, the
37  * OSPM, support APEI
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $");
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 
46 #include <sys/atomic.h>
47 #include <sys/device.h>
48 #include <sys/module.h>
49 #include <sys/sysctl.h>
50 #include <sys/uuid.h>
51 
52 #include <dev/acpi/acpireg.h>
53 #include <dev/acpi/acpivar.h>
54 #include <dev/acpi/apei_bertvar.h>
55 #include <dev/acpi/apei_cper.h>
56 #include <dev/acpi/apei_einjvar.h>
57 #include <dev/acpi/apei_erstvar.h>
58 #include <dev/acpi/apei_hestvar.h>
59 #include <dev/acpi/apei_interp.h>
60 #include <dev/acpi/apeivar.h>
61 
62 #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
63 ACPI_MODULE_NAME	("apei")
64 
65 static int apei_match(device_t, cfdata_t, void *);
66 static void apei_attach(device_t, device_t, void *);
67 static int apei_detach(device_t, int);
68 
69 static void apei_get_tables(struct apei_tab *);
70 static void apei_put_tables(struct apei_tab *);
71 
72 static void apei_identify(struct apei_softc *, const char *,
73     const ACPI_TABLE_HEADER *);
74 
75 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
76     apei_match, apei_attach, apei_detach, NULL);
77 
78 static int
79 apei_match(device_t parent, cfdata_t match, void *aux)
80 {
81 	struct apei_tab tab;
82 	int prio = 0;
83 
84 	/*
85 	 * If we have any of the APEI tables, match.
86 	 */
87 	apei_get_tables(&tab);
88 	if (tab.bert || tab.einj || tab.erst || tab.hest)
89 		prio = 1;
90 	apei_put_tables(&tab);
91 
92 	return prio;
93 }
94 
95 static void
96 apei_attach(device_t parent, device_t self, void *aux)
97 {
98 	struct apei_softc *sc = device_private(self);
99 	const struct sysctlnode *sysctl_hw_acpi;
100 	int error;
101 
102 	aprint_naive("\n");
103 	aprint_normal(": ACPI Platform Error Interface\n");
104 
105 	pmf_device_register(self, NULL, NULL);
106 
107 	sc->sc_dev = self;
108 	apei_get_tables(&sc->sc_tab);
109 
110 	/*
111 	 * Get the sysctl hw.acpi node.  This should already be created
112 	 * but I don't see an easy way to get at it.  If this fails,
113 	 * something is seriously wrong, so let's stop here.
114 	 */
115 	error = sysctl_createv(&sc->sc_sysctllog, 0,
116 	    NULL, &sysctl_hw_acpi, 0,
117 	    CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
118 	    CTL_HW, CTL_CREATE, CTL_EOL);
119 	if (error) {
120 		aprint_error_dev(sc->sc_dev,
121 		    "failed to create sysctl hw.acpi: %d\n", error);
122 		return;
123 	}
124 
125 	/*
126 	 * Create sysctl hw.acpi.apei.
127 	 */
128 	error = sysctl_createv(&sc->sc_sysctllog, 0,
129 	    &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
130 	    CTLTYPE_NODE, "apei",
131 	    SYSCTL_DESCR("ACPI Platform Error Interface"),
132 	    NULL, 0, NULL, 0,
133 	    CTL_CREATE, CTL_EOL);
134 	if (error) {
135 		aprint_error_dev(sc->sc_dev,
136 		    "failed to create sysctl hw.acpi.apei: %d\n", error);
137 		return;
138 	}
139 
140 	/*
141 	 * Set up BERT, EINJ, ERST, and HEST.
142 	 */
143 	if (sc->sc_tab.bert) {
144 		apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
145 		apei_bert_attach(sc);
146 	}
147 	if (sc->sc_tab.einj) {
148 		apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
149 		apei_einj_attach(sc);
150 	}
151 	if (sc->sc_tab.erst) {
152 		apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
153 		apei_erst_attach(sc);
154 	}
155 	if (sc->sc_tab.hest) {
156 		apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
157 		apei_hest_attach(sc);
158 	}
159 }
160 
161 static int
162 apei_detach(device_t self, int flags)
163 {
164 	struct apei_softc *sc = device_private(self);
165 	int error;
166 
167 	/*
168 	 * Detach children.  We don't currently have any but this is
169 	 * harmless without children and mandatory if we ever sprouted
170 	 * them, so let's just leave it here for good measure.
171 	 *
172 	 * After this point, we are committed to detaching; failure is
173 	 * forbidden.
174 	 */
175 	error = config_detach_children(self, flags);
176 	if (error)
177 		return error;
178 
179 	/*
180 	 * Tear down all the sysctl nodes first, before the software
181 	 * state backing them goes away.
182 	 */
183 	sysctl_teardown(&sc->sc_sysctllog);
184 	sc->sc_sysctlroot = NULL;
185 
186 	/*
187 	 * Detach the software state for the APEI tables.
188 	 */
189 	if (sc->sc_tab.hest)
190 		apei_hest_detach(sc);
191 	if (sc->sc_tab.erst)
192 		apei_erst_detach(sc);
193 	if (sc->sc_tab.einj)
194 		apei_einj_detach(sc);
195 	if (sc->sc_tab.bert)
196 		apei_bert_detach(sc);
197 
198 	/*
199 	 * Release the APEI tables and we're done.
200 	 */
201 	apei_put_tables(&sc->sc_tab);
202 	pmf_device_deregister(self);
203 	return 0;
204 }
205 
206 /*
207  * apei_get_tables(tab)
208  *
209  *	Get references to whichever APEI-related tables -- BERT, EINJ,
210  *	ERST, HEST -- are available in the system.
211  */
212 static void
213 apei_get_tables(struct apei_tab *tab)
214 {
215 	ACPI_STATUS rv;
216 
217 	/*
218 	 * Probe the BERT -- Boot Error Record Table.
219 	 */
220 	rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
221 	if (ACPI_FAILURE(rv))
222 		tab->bert = NULL;
223 
224 	/*
225 	 * Probe the EINJ -- Error Injection Table.
226 	 */
227 	rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
228 	if (ACPI_FAILURE(rv))
229 		tab->einj = NULL;
230 
231 	/*
232 	 * Probe the ERST -- Error Record Serialization Table.
233 	 */
234 	rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
235 	if (ACPI_FAILURE(rv))
236 		tab->erst = NULL;
237 
238 	/*
239 	 * Probe the HEST -- Hardware Error Source Table.
240 	 */
241 	rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
242 	if (ACPI_FAILURE(rv))
243 		tab->hest = NULL;
244 }
245 
246 /*
247  * apei_put_tables(tab)
248  *
249  *	Release the tables acquired by apei_get_tables.
250  */
251 static void
252 apei_put_tables(struct apei_tab *tab)
253 {
254 
255 	if (tab->bert != NULL) {
256 		AcpiPutTable(&tab->bert->Header);
257 		tab->bert = NULL;
258 	}
259 	if (tab->einj != NULL) {
260 		AcpiPutTable(&tab->einj->Header);
261 		tab->einj = NULL;
262 	}
263 	if (tab->erst != NULL) {
264 		AcpiPutTable(&tab->erst->Header);
265 		tab->erst = NULL;
266 	}
267 	if (tab->hest != NULL) {
268 		AcpiPutTable(&tab->hest->Header);
269 		tab->hest = NULL;
270 	}
271 }
272 
273 /*
274  * apei_identify(sc, name, header)
275  *
276  *	Identify the APEI-related table header for dmesg.
277  */
278 static void
279 apei_identify(struct apei_softc *sc, const char *name,
280     const ACPI_TABLE_HEADER *h)
281 {
282 
283 	aprint_normal_dev(sc->sc_dev, "%s:"
284 	    " OemId <%6.6s,%8.8s,%08x>"
285 	    " AslId <%4.4s,%08x>\n",
286 	    name,
287 	    h->OemId, h->OemTableId, h->OemRevision,
288 	    h->AslCompilerId, h->AslCompilerRevision);
289 }
290 
291 /*
292  * apei_cper_guid_dec(buf, uuid)
293  *
294  *	Decode a Common Platform Error Record UUID/GUID from an ACPI
295  *	table at buf into a sys/uuid.h struct uuid.
296  */
297 static void
298 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
299 {
300 
301 	uuid_dec_le(buf, uuid);
302 }
303 
304 /*
305  * apei_format_guid(uuid, s)
306  *
307  *	Format a UUID as a string.  This uses C initializer notation,
308  *	not UUID notation, in order to match the text in the UEFI
309  *	specification.
310  */
311 static void
312 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
313 {
314 
315 	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
316 	    "0x%02x%02x,"
317 	    "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
318 	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
319 	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved,
320 	    uuid->node[0], uuid->node[1], uuid->node[2],
321 	    uuid->node[3], uuid->node[4], uuid->node[5]);
322 }
323 
324 /*
325  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
326  */
327 
328 static const char *const cper_memory_error_type[] = {
329 #define	F(LN, SN, V)	[LN] = #SN,
330 	CPER_MEMORY_ERROR_TYPES(F)
331 #undef	F
332 };
333 
334 /*
335  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
336  *
337  * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
338  * but are designated as being intended for Generic Error Data Entries
339  * rather than Generic Error Status Blocks.
340  */
341 static const char *const apei_gesb_severity[] = {
342 	[0] = "recoverable",
343 	[1] = "fatal",
344 	[2] = "corrected",
345 	[3] = "none",
346 };
347 
348 /*
349  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
350  */
351 static const char *const apei_gede_severity[] = {
352 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
353 	[ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
354 	[ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
355 	[ACPI_HEST_GEN_ERROR_NONE] = "none",
356 };
357 
358 /*
359  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
360  */
361 static const struct uuid CPER_MEMORY_ERROR_SECTION =
362     {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
363 
364 static void
365 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
366     size_t len, const char *ctx)
367 {
368 	const struct cper_memory_error *ME = buf;
369 	char bitbuf[1024];
370 
371 	snprintb(bitbuf, sizeof(bitbuf),
372 	    CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
373 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
374 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
375 		/*
376 		 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
377 		 */
378 		/* XXX define this format somewhere */
379 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
380 		    "f\010\010"	"ErrorType\0"
381 			"=\001"		"ERR_INTERNAL\0"
382 			"=\004"		"ERR_MEM\0"
383 			"=\005"		"ERR_TLB\0"
384 			"=\006"		"ERR_CACHE\0"
385 			"=\007"		"ERR_FUNCTION\0"
386 			"=\010"		"ERR_SELFTEST\0"
387 			"=\011"		"ERR_FLOW\0"
388 			"=\020"		"ERR_BUS\0"
389 			"=\021"		"ERR_MAP\0"
390 			"=\022"		"ERR_IMPROPER\0"
391 			"=\023"		"ERR_UNIMPL\0"
392 			"=\024"		"ERR_LOL\0"
393 			"=\025"		"ERR_RESPONSE\0"
394 			"=\026"		"ERR_PARITY\0"
395 			"=\027"		"ERR_PROTOCOL\0"
396 			"=\030"		"ERR_ERROR\0"
397 			"=\031"		"ERR_TIMEOUT\0"
398 			"=\032"		"ERR_POISONED\0"
399 		    "b\020"	"AddressError\0"
400 		    "b\021"	"ControlError\0"
401 		    "b\022"	"DataError\0"
402 		    "b\023"	"ResponderDetected\0"
403 		    "b\024"	"RequesterDetected\0"
404 		    "b\025"	"FirstError\0"
405 		    "b\026"	"Overflow\0"
406 		    "\0", ME->ErrorStatus);
407 		device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
408 	}
409 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
410 		device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
411 		    ctx, ME->PhysicalAddress);
412 	}
413 	if (ME->ValidationBits &
414 	    CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
415 		device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
416 		    "\n", ctx, ME->PhysicalAddressMask);
417 	}
418 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
419 		device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
420 		    ME->Node);
421 	}
422 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
423 		device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
424 		    ME->Card);
425 	}
426 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
427 		device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
428 		    ME->Module);
429 	}
430 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
431 		device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
432 		    ME->Bank);
433 	}
434 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
435 		device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
436 		    ME->Device);
437 	}
438 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
439 		device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
440 		    ME->Row);
441 	}
442 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
443 		device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
444 		    ME->Column);
445 	}
446 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
447 		device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
448 		    ctx, ME->BitPosition);
449 	}
450 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
451 		device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
452 		    ctx, ME->RequestorId);
453 	}
454 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
455 		device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
456 		    ctx, ME->ResponderId);
457 	}
458 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
459 		device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
460 		    ctx, ME->TargetId);
461 	}
462 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
463 		const uint8_t t = ME->MemoryErrorType;
464 		const char *n = t < __arraycount(cper_memory_error_type)
465 		    ? cper_memory_error_type[t] : NULL;
466 
467 		if (n) {
468 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
469 			    " (%s)\n", ctx, t, n);
470 		} else {
471 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
472 			    ctx, t);
473 		}
474 	}
475 }
476 
477 /*
478  * apei_cper_reports
479  *
480  *	Table of known Common Platform Error Record types, symbolic
481  *	names, minimum data lengths, and functions to report them.
482  *
483  *	The section types and corresponding section layouts are listed
484  *	at:
485  *
486  *	https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
487  */
488 static const struct apei_cper_report {
489 	const char *name;
490 	const struct uuid *type;
491 	size_t minlength;
492 	void (*func)(struct apei_softc *, const void *, size_t, const char *);
493 } apei_cper_reports[] = {
494 	{ "memory", &CPER_MEMORY_ERROR_SECTION,
495 	  sizeof(struct cper_memory_error),
496 	  apei_cper_memory_error_report },
497 };
498 
499 /*
500  * apei_gede_report_header(sc, gede, ctx, &headerlen, &report)
501  *
502  *	Report the header of the ith Generic Error Data Entry in the
503  *	given context.
504  *
505  *	Return the actual length of the header in headerlen, or 0 if
506  *	not known because the revision isn't recognized.
507  *
508  *	Return the report type in report, or NULL if not known because
509  *	the section type isn't recognized.
510  */
511 static void
512 apei_gede_report_header(struct apei_softc *sc,
513     const ACPI_HEST_GENERIC_DATA *gede, const char *ctx,
514     size_t *headerlenp, const struct apei_cper_report **reportp)
515 {
516 	const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
517 	struct uuid sectype;
518 	char guidstr[69];
519 	char buf[128];
520 	unsigned i;
521 
522 	/*
523 	 * Print the section type as a C initializer.  It would be
524 	 * prettier to use standard hyphenated UUID notation, but that
525 	 * notation is slightly ambiguous here (two octets could be
526 	 * written either way, depending on Microsoft convention --
527 	 * which influenced ACPI and UEFI -- or internet convention),
528 	 * and the UEFI spec writes the C initializer notation, so this
529 	 * makes it easier to search for.
530 	 *
531 	 * Also print out a symbolic name, if we know it.
532 	 */
533 	apei_cper_guid_dec(gede->SectionType, &sectype);
534 	apei_format_guid(&sectype, guidstr);
535 	for (i = 0; i < __arraycount(apei_cper_reports); i++) {
536 		const struct apei_cper_report *const report =
537 		    &apei_cper_reports[i];
538 
539 		if (memcmp(&sectype, report->type, sizeof(sectype)) != 0)
540 			continue;
541 		device_printf(sc->sc_dev, "%s: SectionType=%s (%s error)\n",
542 		    ctx, guidstr, report->name);
543 		*reportp = report;
544 		break;
545 	}
546 	if (i == __arraycount(apei_cper_reports)) {
547 		device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
548 		    guidstr);
549 		*reportp = NULL;
550 	}
551 
552 	/*
553 	 * Print the numeric severity and, if we have it, a symbolic
554 	 * name for it.
555 	 */
556 	device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", ctx,
557 	    gede->ErrorSeverity,
558 	    (gede->ErrorSeverity < __arraycount(apei_gede_severity)
559 		? apei_gede_severity[gede->ErrorSeverity]
560 		: "unknown"));
561 
562 	/*
563 	 * The Revision may not often be useful, but this is only ever
564 	 * shown at the time of a hardware error report, not something
565 	 * you can glean at your convenience with acpidump.  So print
566 	 * it anyway.
567 	 */
568 	device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
569 	    gede->Revision);
570 
571 	/*
572 	 * Don't touch anything past the Revision until we've
573 	 * determined we understand it.  Return the header length to
574 	 * the caller, or return zero -- and stop here -- if we don't
575 	 * know what the actual header length is.
576 	 */
577 	if (gede->Revision < 0x0300) {
578 		*headerlenp = sizeof(*gede);
579 	} else if (gede->Revision < 0x0400) {
580 		*headerlenp = sizeof(*gede_v3);
581 	} else {
582 		*headerlenp = 0;
583 		return;
584 	}
585 
586 	/*
587 	 * Print the validation bits at debug level.  Only really
588 	 * helpful if there are bits we _don't_ know about.
589 	 */
590 	/* XXX define this format somewhere */
591 	snprintb(buf, sizeof(buf), "\177\020"
592 	    "b\000"	"FRU_ID\0"
593 	    "b\001"	"FRU_TEXT\0" /* `FRU string', sometimes */
594 	    "b\002"	"TIMESTAMP\0"
595 	    "\0", gede->ValidationBits);
596 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf);
597 
598 	/*
599 	 * Print the CPER section flags.
600 	 */
601 	snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags);
602 	device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
603 
604 	/*
605 	 * The ErrorDataLength is unlikely to be useful for the log, so
606 	 * print it at debug level only.
607 	 */
608 	aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n",
609 	    ctx, gede->ErrorDataLength);
610 
611 	/*
612 	 * Print the FRU Id and text, if available.
613 	 */
614 	if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) {
615 		struct uuid fruid;
616 
617 		apei_cper_guid_dec(gede->FruId, &fruid);
618 		apei_format_guid(&fruid, guidstr);
619 		device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
620 	}
621 	if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) {
622 		device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
623 		    ctx, gede->FruText);
624 	}
625 
626 	/*
627 	 * Print the timestamp, if available by the revision number and
628 	 * the validation bits.
629 	 */
630 	if (gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
631 	    gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
632 		const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
633 		const uint8_t s = t[0];
634 		const uint8_t m = t[1];
635 		const uint8_t h = t[2];
636 		const uint8_t f = t[3];
637 		const uint8_t D = t[4];
638 		const uint8_t M = t[5];
639 		const uint8_t Y = t[6];
640 		const uint8_t C = t[7];
641 
642 		device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
643 		    " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
644 		    ctx, gede_v3->TimeStamp,
645 		    C,Y, M, D, h,m,s,
646 		    f & __BIT(0) ? " (event time)" : " (collect time)");
647 	}
648 }
649 
650 /*
651  * apei_gesb_report(sc, gesb, size, ctx)
652  *
653  *	Check a Generic Error Status Block, of at most the specified
654  *	size in bytes, and report any errors in it.  Return the 32-bit
655  *	Block Status in case the caller needs it to acknowledge the
656  *	report to firmware.
657  */
658 uint32_t
659 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
660     size_t size, const char *ctx, bool *fatalp)
661 {
662 	uint32_t status, unknownstatus, severity, nentries, i;
663 	uint32_t datalen, rawdatalen;
664 	const ACPI_HEST_GENERIC_DATA *gede0, *gede;
665 	const unsigned char *rawdata;
666 	char statusbuf[128];
667 	bool fatal = false;
668 
669 	/*
670 	 * Verify the buffer is large enough for a Generic Error Status
671 	 * Block before we try to touch anything in it.
672 	 */
673 	if (size < sizeof(*gesb)) {
674 		device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n",
675 		    ctx, size, sizeof(*gesb));
676 		status = 0;
677 		goto out;
678 	}
679 	size -= sizeof(*gesb);
680 
681 	/*
682 	 * Load the status.  Access ordering rules are unclear in the
683 	 * ACPI specification; I'm guessing that load-acquire of the
684 	 * block status is a good idea before any other access to the
685 	 * GESB.
686 	 */
687 	status = atomic_load_acquire(&gesb->BlockStatus);
688 
689 	/*
690 	 * If there are no status bits set, the rest of the GESB is
691 	 * garbage, so stop here.
692 	 */
693 	if (status == 0) {
694 		/* XXX dtrace */
695 		/* XXX DPRINTF */
696 		goto out;
697 	}
698 
699 	/* XXX define this format somewhere */
700 	snprintb(statusbuf, sizeof(statusbuf), "\177\020"
701 	    "b\000"	"UE\0"
702 	    "b\001"	"CE\0"
703 	    "b\002"	"MULTI_UE\0"
704 	    "b\003"	"MULTI_CE\0"
705 	    "f\004\010"	"GEDE_COUNT\0"
706 	    "\0", status);
707 
708 	/*
709 	 * Print a message to the console and dmesg about the severity
710 	 * of the error.
711 	 */
712 	severity = gesb->ErrorSeverity;
713 	nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
714 	if (severity < __arraycount(apei_gesb_severity)) {
715 		device_printf(sc->sc_dev, "%s reported hardware error:"
716 		    " severity=%s nentries=%u status=%s\n",
717 		    ctx, apei_gesb_severity[severity], nentries, statusbuf);
718 	} else {
719 		device_printf(sc->sc_dev, "%s reported error:"
720 		    " severity=%"PRIu32" nentries=%u status=%s\n",
721 		    ctx, severity, nentries, statusbuf);
722 	}
723 
724 	/*
725 	 * Make a determination about whether the error is fatal.
726 	 *
727 	 * XXX Currently we don't have any mechanism to recover from
728 	 * uncorrectable but recoverable errors, so we treat those --
729 	 * and anything else we don't recognize -- as fatal.
730 	 */
731 	switch (severity) {
732 	case ACPI_HEST_GEN_ERROR_CORRECTED:
733 	case ACPI_HEST_GEN_ERROR_NONE:
734 		fatal = false;
735 		break;
736 	case ACPI_HEST_GEN_ERROR_FATAL:
737 	case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
738 	default:
739 		fatal = true;
740 		break;
741 	}
742 
743 	/*
744 	 * Clear the bits we know about to warn if there's anything
745 	 * left we don't understand.
746 	 */
747 	unknownstatus = status;
748 	unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
749 	unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
750 	unknownstatus &= ~ACPI_HEST_CORRECTABLE;
751 	unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
752 	unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
753 	if (unknownstatus != 0) {
754 		/* XXX dtrace */
755 		/* XXX rate-limit? */
756 		device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
757 		    " 0x%"PRIx32"\n", ctx, unknownstatus);
758 	}
759 
760 	/*
761 	 * Advance past the Generic Error Status Block (GESB) header to
762 	 * the Generic Error Data Entries (GEDEs).
763 	 */
764 	gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
765 
766 	/*
767 	 * Verify that the data length (GEDEs) fits within the size.
768 	 * If not, truncate the GEDEs.
769 	 */
770 	datalen = gesb->DataLength;
771 	if (size < datalen) {
772 		device_printf(sc->sc_dev, "%s:"
773 		    " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n",
774 		    ctx, size, datalen);
775 		datalen = size;
776 	}
777 	size -= datalen;
778 
779 	/*
780 	 * Report each of the Generic Error Data Entries.
781 	 */
782 	for (i = 0; i < nentries; i++) {
783 		size_t headerlen;
784 		const struct apei_cper_report *report;
785 		char subctx[128];
786 
787 		/*
788 		 * Format a subcontext to show this numbered entry of
789 		 * the GESB.
790 		 */
791 		snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
792 
793 		/*
794 		 * If the remaining GESB data length isn't enough for a
795 		 * GEDE header, stop here.
796 		 */
797 		if (datalen < sizeof(*gede)) {
798 			device_printf(sc->sc_dev, "%s:"
799 			    " truncated GEDE: %"PRIu32" < %zu bytes\n",
800 			    subctx, datalen, sizeof(*gede));
801 			break;
802 		}
803 
804 		/*
805 		 * Print the GEDE header and get the full length (may
806 		 * vary from revision to revision of the GEDE) and the
807 		 * CPER report function if possible.
808 		 */
809 		apei_gede_report_header(sc, gede, subctx,
810 		    &headerlen, &report);
811 
812 		/*
813 		 * If we don't know the header length because of an
814 		 * unfamiliar revision, stop here.
815 		 */
816 		if (headerlen == 0) {
817 			device_printf(sc->sc_dev, "%s:"
818 			    " unknown revision: 0x%"PRIx16"\n",
819 			    subctx, gede->Revision);
820 			break;
821 		}
822 
823 		/*
824 		 * Stop here if what we mapped is too small for the
825 		 * error data length.
826 		 */
827 		datalen -= headerlen;
828 		if (datalen < gede->ErrorDataLength) {
829 			device_printf(sc->sc_dev, "%s: truncated GEDE payload:"
830 			    " %"PRIu32" < %"PRIu32" bytes\n",
831 			    subctx, datalen, gede->ErrorDataLength);
832 			break;
833 		}
834 
835 		/*
836 		 * Report the Common Platform Error Record appendix to
837 		 * this Generic Error Data Entry.
838 		 */
839 		if (report == NULL) {
840 			device_printf(sc->sc_dev, "%s: [unknown type]\n", ctx);
841 		} else {
842 			(*report->func)(sc, (const char *)gede + headerlen,
843 			    gede->ErrorDataLength, subctx);
844 		}
845 
846 		/*
847 		 * Advance past the GEDE header and CPER data to the
848 		 * next GEDE.
849 		 */
850 		gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
851 		    + headerlen + gede->ErrorDataLength);
852 	}
853 
854 	/*
855 	 * Advance past the Generic Error Data Entries (GEDEs) to the
856 	 * raw error data.
857 	 *
858 	 * XXX Provide Max Raw Data Length as a parameter, as found in
859 	 * various HEST entry types.
860 	 */
861 	rawdata = (const unsigned char *)gede0 + datalen;
862 
863 	/*
864 	 * Verify that the raw data length fits within the size.  If
865 	 * not, truncate the raw data.
866 	 */
867 	rawdatalen = gesb->RawDataLength;
868 	if (size < rawdatalen) {
869 		device_printf(sc->sc_dev, "%s:"
870 		    " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n",
871 		    ctx, size, rawdatalen);
872 		rawdatalen = size;
873 	}
874 	size -= rawdatalen;
875 
876 	/*
877 	 * Hexdump the raw data, if any.
878 	 */
879 	if (rawdatalen > 0) {
880 		char devctx[128];
881 
882 		snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
883 		    device_xname(sc->sc_dev), ctx);
884 		hexdump(printf, devctx, rawdata, rawdatalen);
885 	}
886 
887 	/*
888 	 * If there's anything left after the raw data, warn.
889 	 */
890 	if (size > 0) {
891 		device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
892 		    ctx, size);
893 	}
894 
895 	/*
896 	 * Return the status so the caller can ack it, and tell the
897 	 * caller whether this error is fatal.
898 	 */
899 out:	*fatalp = fatal;
900 	return status;
901 }
902 
903 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
904 
905 #ifdef _MODULE
906 #include "ioconf.c"
907 #endif
908 
909 static int
910 apei_modcmd(modcmd_t cmd, void *opaque)
911 {
912 	int error = 0;
913 
914 	switch (cmd) {
915 	case MODULE_CMD_INIT:
916 #ifdef _MODULE
917 		error = config_init_component(cfdriver_ioconf_apei,
918 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
919 #endif
920 		return error;
921 	case MODULE_CMD_FINI:
922 #ifdef _MODULE
923 		error = config_fini_component(cfdriver_ioconf_apei,
924 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
925 #endif
926 		return error;
927 	default:
928 		return ENOTTY;
929 	}
930 }
931