xref: /netbsd-src/sys/dev/acpi/apei.c (revision 6a2ddc3ce21a1d518c74fc309686ac5f8ffde59b)
1 /*	$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * APEI: ACPI Platform Error Interface
31  *
32  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33  *
34  * XXX dtrace probes
35  *
36  * XXX call _OSC appropriately to announce to the platform that we, the
37  * OSPM, support APEI
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $");
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 
46 #include <sys/atomic.h>
47 #include <sys/endian.h>
48 #include <sys/device.h>
49 #include <sys/module.h>
50 #include <sys/sysctl.h>
51 #include <sys/uuid.h>
52 
53 #include <dev/acpi/acpireg.h>
54 #include <dev/acpi/acpivar.h>
55 #include <dev/acpi/apei_bertvar.h>
56 #include <dev/acpi/apei_cper.h>
57 #include <dev/acpi/apei_einjvar.h>
58 #include <dev/acpi/apei_erstvar.h>
59 #include <dev/acpi/apei_hestvar.h>
60 #include <dev/acpi/apei_interp.h>
61 #include <dev/acpi/apeivar.h>
62 #include <dev/pci/pcireg.h>
63 
64 #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
65 ACPI_MODULE_NAME	("apei")
66 
67 static int apei_match(device_t, cfdata_t, void *);
68 static void apei_attach(device_t, device_t, void *);
69 static int apei_detach(device_t, int);
70 
71 static void apei_get_tables(struct apei_tab *);
72 static void apei_put_tables(struct apei_tab *);
73 
74 static void apei_identify(struct apei_softc *, const char *,
75     const ACPI_TABLE_HEADER *);
76 
77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
78     apei_match, apei_attach, apei_detach, NULL);
79 
80 static int
81 apei_match(device_t parent, cfdata_t match, void *aux)
82 {
83 	struct apei_tab tab;
84 	int prio = 0;
85 
86 	/*
87 	 * If we have any of the APEI tables, match.
88 	 */
89 	apei_get_tables(&tab);
90 	if (tab.bert || tab.einj || tab.erst || tab.hest)
91 		prio = 1;
92 	apei_put_tables(&tab);
93 
94 	return prio;
95 }
96 
97 static void
98 apei_attach(device_t parent, device_t self, void *aux)
99 {
100 	struct apei_softc *sc = device_private(self);
101 	const struct sysctlnode *sysctl_hw_acpi;
102 	int error;
103 
104 	aprint_naive("\n");
105 	aprint_normal(": ACPI Platform Error Interface\n");
106 
107 	pmf_device_register(self, NULL, NULL);
108 
109 	sc->sc_dev = self;
110 	apei_get_tables(&sc->sc_tab);
111 
112 	/*
113 	 * Get the sysctl hw.acpi node.  This should already be created
114 	 * but I don't see an easy way to get at it.  If this fails,
115 	 * something is seriously wrong, so let's stop here.
116 	 */
117 	error = sysctl_createv(&sc->sc_sysctllog, 0,
118 	    NULL, &sysctl_hw_acpi, 0,
119 	    CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
120 	    CTL_HW, CTL_CREATE, CTL_EOL);
121 	if (error) {
122 		aprint_error_dev(sc->sc_dev,
123 		    "failed to create sysctl hw.acpi: %d\n", error);
124 		return;
125 	}
126 
127 	/*
128 	 * Create sysctl hw.acpi.apei.
129 	 */
130 	error = sysctl_createv(&sc->sc_sysctllog, 0,
131 	    &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
132 	    CTLTYPE_NODE, "apei",
133 	    SYSCTL_DESCR("ACPI Platform Error Interface"),
134 	    NULL, 0, NULL, 0,
135 	    CTL_CREATE, CTL_EOL);
136 	if (error) {
137 		aprint_error_dev(sc->sc_dev,
138 		    "failed to create sysctl hw.acpi.apei: %d\n", error);
139 		return;
140 	}
141 
142 	/*
143 	 * Set up BERT, EINJ, ERST, and HEST.
144 	 */
145 	if (sc->sc_tab.bert) {
146 		apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
147 		apei_bert_attach(sc);
148 	}
149 	if (sc->sc_tab.einj) {
150 		apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
151 		apei_einj_attach(sc);
152 	}
153 	if (sc->sc_tab.erst) {
154 		apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
155 		apei_erst_attach(sc);
156 	}
157 	if (sc->sc_tab.hest) {
158 		apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
159 		apei_hest_attach(sc);
160 	}
161 }
162 
163 static int
164 apei_detach(device_t self, int flags)
165 {
166 	struct apei_softc *sc = device_private(self);
167 	int error;
168 
169 	/*
170 	 * Detach children.  We don't currently have any but this is
171 	 * harmless without children and mandatory if we ever sprouted
172 	 * them, so let's just leave it here for good measure.
173 	 *
174 	 * After this point, we are committed to detaching; failure is
175 	 * forbidden.
176 	 */
177 	error = config_detach_children(self, flags);
178 	if (error)
179 		return error;
180 
181 	/*
182 	 * Tear down all the sysctl nodes first, before the software
183 	 * state backing them goes away.
184 	 */
185 	sysctl_teardown(&sc->sc_sysctllog);
186 	sc->sc_sysctlroot = NULL;
187 
188 	/*
189 	 * Detach the software state for the APEI tables.
190 	 */
191 	if (sc->sc_tab.hest)
192 		apei_hest_detach(sc);
193 	if (sc->sc_tab.erst)
194 		apei_erst_detach(sc);
195 	if (sc->sc_tab.einj)
196 		apei_einj_detach(sc);
197 	if (sc->sc_tab.bert)
198 		apei_bert_detach(sc);
199 
200 	/*
201 	 * Release the APEI tables and we're done.
202 	 */
203 	apei_put_tables(&sc->sc_tab);
204 	pmf_device_deregister(self);
205 	return 0;
206 }
207 
208 /*
209  * apei_get_tables(tab)
210  *
211  *	Get references to whichever APEI-related tables -- BERT, EINJ,
212  *	ERST, HEST -- are available in the system.
213  */
214 static void
215 apei_get_tables(struct apei_tab *tab)
216 {
217 	ACPI_STATUS rv;
218 
219 	/*
220 	 * Probe the BERT -- Boot Error Record Table.
221 	 */
222 	rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
223 	if (ACPI_FAILURE(rv))
224 		tab->bert = NULL;
225 
226 	/*
227 	 * Probe the EINJ -- Error Injection Table.
228 	 */
229 	rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
230 	if (ACPI_FAILURE(rv))
231 		tab->einj = NULL;
232 
233 	/*
234 	 * Probe the ERST -- Error Record Serialization Table.
235 	 */
236 	rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
237 	if (ACPI_FAILURE(rv))
238 		tab->erst = NULL;
239 
240 	/*
241 	 * Probe the HEST -- Hardware Error Source Table.
242 	 */
243 	rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
244 	if (ACPI_FAILURE(rv))
245 		tab->hest = NULL;
246 }
247 
248 /*
249  * apei_put_tables(tab)
250  *
251  *	Release the tables acquired by apei_get_tables.
252  */
253 static void
254 apei_put_tables(struct apei_tab *tab)
255 {
256 
257 	if (tab->bert != NULL) {
258 		AcpiPutTable(&tab->bert->Header);
259 		tab->bert = NULL;
260 	}
261 	if (tab->einj != NULL) {
262 		AcpiPutTable(&tab->einj->Header);
263 		tab->einj = NULL;
264 	}
265 	if (tab->erst != NULL) {
266 		AcpiPutTable(&tab->erst->Header);
267 		tab->erst = NULL;
268 	}
269 	if (tab->hest != NULL) {
270 		AcpiPutTable(&tab->hest->Header);
271 		tab->hest = NULL;
272 	}
273 }
274 
275 /*
276  * apei_identify(sc, name, header)
277  *
278  *	Identify the APEI-related table header for dmesg.
279  */
280 static void
281 apei_identify(struct apei_softc *sc, const char *name,
282     const ACPI_TABLE_HEADER *h)
283 {
284 
285 	aprint_normal_dev(sc->sc_dev, "%s:"
286 	    " OemId <%6.6s,%8.8s,%08x>"
287 	    " AslId <%4.4s,%08x>\n",
288 	    name,
289 	    h->OemId, h->OemTableId, h->OemRevision,
290 	    h->AslCompilerId, h->AslCompilerRevision);
291 }
292 
293 /*
294  * apei_cper_guid_dec(buf, uuid)
295  *
296  *	Decode a Common Platform Error Record UUID/GUID from an ACPI
297  *	table at buf into a sys/uuid.h struct uuid.
298  */
299 static void
300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
301 {
302 
303 	uuid_dec_le(buf, uuid);
304 }
305 
306 /*
307  * apei_format_guid(uuid, s)
308  *
309  *	Format a UUID as a string.  This uses C initializer notation,
310  *	not UUID notation, in order to match the text in the UEFI
311  *	specification.
312  */
313 static void
314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
315 {
316 
317 	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
318 	    "{0x%02x,%02x,"
319 	    "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
320 	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
321 	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
322 	    uuid->node[0], uuid->node[1], uuid->node[2],
323 	    uuid->node[3], uuid->node[4], uuid->node[5]);
324 }
325 
326 /*
327  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
328  */
329 
330 static const char *const cper_memory_error_type[] = {
331 #define	F(LN, SN, V)	[LN] = #SN,
332 	CPER_MEMORY_ERROR_TYPES(F)
333 #undef	F
334 };
335 
336 /*
337  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
338  *
339  * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
340  * but are designated as being intended for Generic Error Data Entries
341  * rather than Generic Error Status Blocks.
342  */
343 static const char *const apei_gesb_severity[] = {
344 	[0] = "recoverable",
345 	[1] = "fatal",
346 	[2] = "corrected",
347 	[3] = "none",
348 };
349 
350 /*
351  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
352  */
353 static const char *const apei_gede_severity[] = {
354 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
355 	[ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
356 	[ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
357 	[ACPI_HEST_GEN_ERROR_NONE] = "none",
358 };
359 
360 /*
361  * N.2.5. Memory Error Section
362  *
363  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
364  */
365 static const struct uuid CPER_MEMORY_ERROR_SECTION =
366     {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
367 
368 static void
369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
370     size_t len, const char *ctx, bool ratelimitok)
371 {
372 	const struct cper_memory_error *ME = buf;
373 	char bitbuf[1024];
374 
375 	/*
376 	 * If we've hit the rate limit, skip printing the error.
377 	 */
378 	if (!ratelimitok)
379 		goto out;
380 
381 	snprintb(bitbuf, sizeof(bitbuf),
382 	    CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
383 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
384 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
385 		/*
386 		 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
387 		 */
388 		/* XXX define this format somewhere */
389 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
390 		    "f\010\010"	"ErrorType\0"
391 			"=\001"		"ERR_INTERNAL\0"
392 			"=\004"		"ERR_MEM\0"
393 			"=\005"		"ERR_TLB\0"
394 			"=\006"		"ERR_CACHE\0"
395 			"=\007"		"ERR_FUNCTION\0"
396 			"=\010"		"ERR_SELFTEST\0"
397 			"=\011"		"ERR_FLOW\0"
398 			"=\020"		"ERR_BUS\0"
399 			"=\021"		"ERR_MAP\0"
400 			"=\022"		"ERR_IMPROPER\0"
401 			"=\023"		"ERR_UNIMPL\0"
402 			"=\024"		"ERR_LOL\0"
403 			"=\025"		"ERR_RESPONSE\0"
404 			"=\026"		"ERR_PARITY\0"
405 			"=\027"		"ERR_PROTOCOL\0"
406 			"=\030"		"ERR_ERROR\0"
407 			"=\031"		"ERR_TIMEOUT\0"
408 			"=\032"		"ERR_POISONED\0"
409 		    "b\020"	"AddressError\0"
410 		    "b\021"	"ControlError\0"
411 		    "b\022"	"DataError\0"
412 		    "b\023"	"ResponderDetected\0"
413 		    "b\024"	"RequesterDetected\0"
414 		    "b\025"	"FirstError\0"
415 		    "b\026"	"Overflow\0"
416 		    "\0", ME->ErrorStatus);
417 		device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
418 	}
419 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
420 		device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
421 		    ctx, ME->PhysicalAddress);
422 	}
423 	if (ME->ValidationBits &
424 	    CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
425 		device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
426 		    "\n", ctx, ME->PhysicalAddressMask);
427 	}
428 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
429 		device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
430 		    ME->Node);
431 	}
432 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
433 		device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
434 		    ME->Card);
435 	}
436 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
437 		device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
438 		    ME->Module);
439 	}
440 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
441 		device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
442 		    ME->Bank);
443 	}
444 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
445 		device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
446 		    ME->Device);
447 	}
448 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
449 		device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
450 		    ME->Row);
451 	}
452 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
453 		device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
454 		    ME->Column);
455 	}
456 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
457 		device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
458 		    ctx, ME->BitPosition);
459 	}
460 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
461 		device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
462 		    ctx, ME->RequestorId);
463 	}
464 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
465 		device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
466 		    ctx, ME->ResponderId);
467 	}
468 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
469 		device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
470 		    ctx, ME->TargetId);
471 	}
472 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
473 		const uint8_t t = ME->MemoryErrorType;
474 		const char *n = t < __arraycount(cper_memory_error_type)
475 		    ? cper_memory_error_type[t] : NULL;
476 
477 		if (n) {
478 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
479 			    " (%s)\n", ctx, t, n);
480 		} else {
481 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
482 			    ctx, t);
483 		}
484 	}
485 
486 out:	/*
487 	 * XXX pass this through to uvm(9) or userland for decisions
488 	 * like page retirement
489 	 */
490 	return;
491 }
492 
493 /*
494  * N.2.7. PCI Express Error Section
495  *
496  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
497  */
498 static const struct uuid CPER_PCIE_ERROR_SECTION =
499     {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
500 
501 static const char *const cper_pcie_error_port_type[] = {
502 #define	F(LN, SN, V)	[LN] = #SN,
503 	CPER_PCIE_ERROR_PORT_TYPES(F)
504 #undef	F
505 };
506 
507 static void
508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
509     const char *ctx, bool ratelimitok)
510 {
511 	const struct cper_pcie_error *PE = buf;
512 	char bitbuf[1024];
513 
514 	/*
515 	 * If we've hit the rate limit, skip printing the error.
516 	 */
517 	if (!ratelimitok)
518 		goto out;
519 
520 	snprintb(bitbuf, sizeof(bitbuf),
521 	    CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
522 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
523 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
524 		const uint32_t t = PE->PortType;
525 		const char *n = t < __arraycount(cper_pcie_error_port_type)
526 		    ? cper_pcie_error_port_type[t] : NULL;
527 
528 		if (n) {
529 			device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
530 			    " (%s)\n", ctx, t, n);
531 		} else {
532 			device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
533 			    ctx, t);
534 		}
535 	}
536 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
537 		/* XXX BCD */
538 		device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
539 		    ctx, PE->Version);
540 	}
541 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
542 		/* XXX move me to pcireg.h */
543 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
544 			/* command */
545 		    "b\000"	"IO_ENABLE\0"
546 		    "b\001"	"MEM_ENABLE\0"
547 		    "b\002"	"MASTER_ENABLE\0"
548 		    "b\003"	"SPECIAL_ENABLE\0"
549 		    "b\004"	"INVALIDATE_ENABLE\0"
550 		    "b\005"	"PALETTE_ENABLE\0"
551 		    "b\006"	"PARITY_ENABLE\0"
552 		    "b\007"	"STEPPING_ENABLE\0"
553 		    "b\010"	"SERR_ENABLE\0"
554 		    "b\011"	"BACKTOBACK_ENABLE\0"
555 		    "b\012"	"INTERRUPT_DISABLE\0"
556 			/* status */
557 		    "b\023"	"INT_STATUS\0"
558 		    "b\024"	"CAPLIST_SUPPORT\0"
559 		    "b\025"	"66MHZ_SUPPORT\0"
560 		    "b\026"	"UDF_SUPPORT\0"
561 		    "b\027"	"BACKTOBACK_SUPPORT\0"
562 		    "b\030"	"PARITY_ERROR\0"
563 		    "f\031\002"	"DEVSEL\0"
564 			"=\000"		"FAST\0"
565 			"=\001"		"MEDIUM\0"
566 			"=\002"		"SLOW\0"
567 		    "b\033"	"TARGET_TARGET_ABORT\0"
568 		    "b\034"	"MASTER_TARGET_ABORT\0"
569 		    "b\035"	"MASTER_ABORT\0"
570 		    "b\036"	"SPECIAL_ERROR\0"
571 		    "b\037"	"PARITY_DETECT\0"
572 		    "\0", PE->CommandStatus);
573 		device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
574 		    ctx, bitbuf);
575 	}
576 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
577 		device_printf(sc->sc_dev, "%s: DeviceID:"
578 		    " VendorID=0x%04"PRIx16
579 		    " DeviceID=0x%04"PRIx16
580 		    " ClassCode=0x%06"PRIx32
581 		    " Function=%"PRIu8
582 		    " Device=%"PRIu8
583 		    " Segment=%"PRIu16
584 		    " Bus=%"PRIu8
585 		    " SecondaryBus=%"PRIu8
586 		    " Slot=0x%04"PRIx16
587 		    " Reserved0=0x%02"PRIx8
588 		    "\n",
589 		    ctx,
590 		    le16dec(PE->DeviceID.VendorID),
591 		    le16dec(PE->DeviceID.DeviceID),
592 		    (PE->DeviceID.ClassCode[0] |	/* le24dec */
593 			((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
594 			((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
595 		    PE->DeviceID.Function, PE->DeviceID.Device,
596 		    le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
597 		    PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
598 		    PE->DeviceID.Reserved0);
599 	}
600 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
601 		device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
602 		    ctx, PE->DeviceSerial);
603 	}
604 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
605 		/* XXX snprintb */
606 		device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
607 		    "\n", ctx, PE->BridgeControlStatus);
608 	}
609 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
610 		uint32_t dcsr, dsr;
611 		char hex[9*sizeof(PE->CapabilityStructure)/4];
612 		unsigned i;
613 
614 		/*
615 		 * Display a hex dump of each 32-bit register in the
616 		 * PCIe capability structure.
617 		 */
618 		__CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0);
619 		for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) {
620 			snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
621 			    le32dec(&PE->CapabilityStructure[4*i]));
622 		}
623 		hex[sizeof(hex) - 1] = '\0';
624 		device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
625 		    ctx, hex);
626 
627 		/*
628 		 * If the Device Status Register has any bits set,
629 		 * highlight it in particular -- these are probably
630 		 * error bits.
631 		 */
632 		dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
633 		dsr = __SHIFTOUT(dcsr, __BITS(31,16));
634 		if (dsr != 0) {
635 			/*
636 			 * XXX move me to pcireg.h; note: high
637 			 * half of DCSR
638 			 */
639 			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
640 			    "b\000"	"CORRECTABLE_ERROR\0"
641 			    "b\001"	"NONFATAL_UNCORRECTABLE_ERROR\0"
642 			    "b\002"	"FATAL_ERROR\0"
643 			    "b\003"	"UNSUPPORTED_REQUEST\0"
644 			    "b\004"	"AUX_POWER\0"
645 			    "b\005"	"TRANSACTIONS_PENDING\0"
646 			    "\0", dsr);
647 			device_printf(sc->sc_dev, "%s: PCIe Device Status:"
648 			    " %s\n",
649 			    ctx, bitbuf);
650 		}
651 	}
652 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
653 		uint32_t uc_status, uc_sev;
654 		uint32_t cor_status;
655 		uint32_t control;
656 		char hex[9*sizeof(PE->AERInfo)/4];
657 		unsigned i;
658 
659 		/*
660 		 * Display a hex dump of each 32-bit register in the
661 		 * PCIe Advanced Error Reporting extended capability
662 		 * structure.
663 		 */
664 		__CTASSERT(sizeof(PE->AERInfo) % 4 == 0);
665 		for (i = 0; i < sizeof(PE->AERInfo)/4; i++) {
666 			snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
667 			    le32dec(&PE->AERInfo[4*i]));
668 		}
669 		hex[sizeof(hex) - 1] = '\0';
670 		device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
671 
672 			/* XXX move me to pcireg.h */
673 #define	PCI_AER_UC_STATUS_FMT	"\177\020"				      \
674 	"b\000"	"UNDEFINED\0"						      \
675 	"b\004"	"DL_PROTOCOL_ERROR\0"					      \
676 	"b\005"	"SURPRISE_DOWN_ERROR\0"					      \
677 	"b\014"	"POISONED_TLP\0"					      \
678 	"b\015"	"FC_PROTOCOL_ERROR\0"					      \
679 	"b\016"	"COMPLETION_TIMEOUT\0"					      \
680 	"b\017"	"COMPLETION_ABORT\0"					      \
681 	"b\020"	"UNEXPECTED_COMPLETION\0"				      \
682 	"b\021"	"RECEIVER_OVERFLOW\0"					      \
683 	"b\022"	"MALFORMED_TLP\0"					      \
684 	"b\023"	"ECRC_ERROR\0"						      \
685 	"b\024"	"UNSUPPORTED_REQUEST_ERROR\0"				      \
686 	"b\025"	"ACS_VIOLATION\0"					      \
687 	"b\026"	"INTERNAL_ERROR\0"					      \
688 	"b\027"	"MC_BLOCKED_TLP\0"					      \
689 	"b\030"	"ATOMIC_OP_EGRESS_BLOCKED\0"				      \
690 	"b\031"	"TLP_PREFIX_BLOCKED_ERROR\0"				      \
691 	"b\032"	"POISONTLP_EGRESS_BLOCKED\0"				      \
692 	"\0"
693 
694 		/*
695 		 * If there are any hardware error status bits set,
696 		 * highlight them in particular, in three groups:
697 		 *
698 		 * - uncorrectable fatal (UC_STATUS and UC_SEVERITY)
699 		 * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY)
700 		 * - corrected (COR_STATUS)
701 		 *
702 		 * And if there are any uncorrectable errors, show
703 		 * which one was reported first, according to
704 		 * CAP_CONTROL.
705 		 */
706 		uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
707 		uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
708 		cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
709 		control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
710 
711 		if (uc_status & uc_sev) {
712 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
713 			    uc_status & uc_sev);
714 			device_printf(sc->sc_dev, "%s:"
715 			    " AER hardware fatal uncorrectable errors: %s\n",
716 			    ctx, bitbuf);
717 		}
718 		if (uc_status & ~uc_sev) {
719 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
720 			    uc_status & ~uc_sev);
721 			device_printf(sc->sc_dev, "%s:"
722 			    " AER hardware non-fatal uncorrectable errors:"
723 			    " %s\n",
724 			    ctx, bitbuf);
725 		}
726 		if (uc_status) {
727 			unsigned first = __SHIFTOUT(control,
728 			    PCI_AER_FIRST_ERROR_PTR);
729 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
730 			    (uint32_t)1 << first);
731 			device_printf(sc->sc_dev, "%s:"
732 			    " AER hardware first uncorrectable error: %s\n",
733 			    ctx, bitbuf);
734 		}
735 		if (cor_status) {
736 			/* XXX move me to pcireg.h */
737 			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
738 			    "b\000"	"RECEIVER_ERROR\0"
739 			    "b\006"	"BAD_TLP\0"
740 			    "b\007"	"BAD_DLLP\0"
741 			    "b\010"	"REPLAY_NUM_ROLLOVER\0"
742 			    "b\014"	"REPLAY_TIMER_TIMEOUT\0"
743 			    "b\015"	"ADVISORY_NF_ERROR\0"
744 			    "b\016"	"INTERNAL_ERROR\0"
745 			    "b\017"	"HEADER_LOG_OVERFLOW\0"
746 			    "\0", cor_status);
747 			device_printf(sc->sc_dev, "%s:"
748 			    " AER hardware corrected error: %s\n",
749 			    ctx, bitbuf);
750 		}
751 	}
752 
753 out:	/*
754 	 * XXX pass this on to the PCI subsystem to handle
755 	 */
756 	return;
757 }
758 
759 /*
760  * apei_cper_reports
761  *
762  *	Table of known Common Platform Error Record types, symbolic
763  *	names, minimum data lengths, and functions to report them.
764  *
765  *	The section types and corresponding section layouts are listed
766  *	at:
767  *
768  *	https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
769  */
770 static const struct apei_cper_report {
771 	const char *name;
772 	const struct uuid *type;
773 	size_t minlength;
774 	void (*func)(struct apei_softc *, const void *, size_t, const char *,
775 	    bool);
776 } apei_cper_reports[] = {
777 	{ "memory", &CPER_MEMORY_ERROR_SECTION,
778 	  sizeof(struct cper_memory_error),
779 	  apei_cper_memory_error_report },
780 	{ "PCIe", &CPER_PCIE_ERROR_SECTION,
781 	  sizeof(struct cper_pcie_error),
782 	  apei_cper_pcie_error_report },
783 };
784 
785 /*
786  * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
787  *
788  *	Report the header of the ith Generic Error Data Entry in the
789  *	given context, if ratelimitok is true.
790  *
791  *	Return the actual length of the header in headerlen, or 0 if
792  *	not known because the revision isn't recognized.
793  *
794  *	Return the report type in report, or NULL if not known because
795  *	the section type isn't recognized.
796  */
797 static void
798 apei_gede_report_header(struct apei_softc *sc,
799     const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
800     size_t *headerlenp, const struct apei_cper_report **reportp)
801 {
802 	const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
803 	struct uuid sectype;
804 	char guidstr[69];
805 	char buf[128];
806 	unsigned i;
807 
808 	/*
809 	 * Print the section type as a C initializer.  It would be
810 	 * prettier to use standard hyphenated UUID notation, but that
811 	 * notation is slightly ambiguous here (two octets could be
812 	 * written either way, depending on Microsoft convention --
813 	 * which influenced ACPI and UEFI -- or internet convention),
814 	 * and the UEFI spec writes the C initializer notation, so this
815 	 * makes it easier to search for.
816 	 *
817 	 * Also print out a symbolic name, if we know it.
818 	 */
819 	apei_cper_guid_dec(gede->SectionType, &sectype);
820 	apei_format_guid(&sectype, guidstr);
821 	for (i = 0; i < __arraycount(apei_cper_reports); i++) {
822 		const struct apei_cper_report *const report =
823 		    &apei_cper_reports[i];
824 
825 		if (memcmp(&sectype, report->type, sizeof(sectype)) != 0)
826 			continue;
827 		if (ratelimitok) {
828 			device_printf(sc->sc_dev, "%s:"
829 			    " SectionType=%s (%s error)\n",
830 			    ctx, guidstr, report->name);
831 		}
832 		*reportp = report;
833 		break;
834 	}
835 	if (i == __arraycount(apei_cper_reports)) {
836 		if (ratelimitok) {
837 			device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
838 			    guidstr);
839 		}
840 		*reportp = NULL;
841 	}
842 
843 	/*
844 	 * Print the numeric severity and, if we have it, a symbolic
845 	 * name for it.
846 	 */
847 	if (ratelimitok) {
848 		device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
849 		    ctx,
850 		    gede->ErrorSeverity,
851 		    (gede->ErrorSeverity < __arraycount(apei_gede_severity)
852 			? apei_gede_severity[gede->ErrorSeverity]
853 			: "unknown"));
854 	}
855 
856 	/*
857 	 * The Revision may not often be useful, but this is only ever
858 	 * shown at the time of a hardware error report, not something
859 	 * you can glean at your convenience with acpidump.  So print
860 	 * it anyway.
861 	 */
862 	if (ratelimitok) {
863 		device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
864 		    gede->Revision);
865 	}
866 
867 	/*
868 	 * Don't touch anything past the Revision until we've
869 	 * determined we understand it.  Return the header length to
870 	 * the caller, or return zero -- and stop here -- if we don't
871 	 * know what the actual header length is.
872 	 */
873 	if (gede->Revision < 0x0300) {
874 		*headerlenp = sizeof(*gede);
875 	} else if (gede->Revision < 0x0400) {
876 		*headerlenp = sizeof(*gede_v3);
877 	} else {
878 		*headerlenp = 0;
879 		return;
880 	}
881 
882 	/*
883 	 * Print the validation bits at debug level.  Only really
884 	 * helpful if there are bits we _don't_ know about.
885 	 */
886 	if (ratelimitok) {
887 		/* XXX define this format somewhere */
888 		snprintb(buf, sizeof(buf), "\177\020"
889 		    "b\000"	"FRU_ID\0"
890 		    "b\001"	"FRU_TEXT\0" /* `FRU string', sometimes */
891 		    "b\002"	"TIMESTAMP\0"
892 		    "\0", gede->ValidationBits);
893 		aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
894 		    buf);
895 	}
896 
897 	/*
898 	 * Print the CPER section flags.
899 	 */
900 	if (ratelimitok) {
901 		snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
902 		    gede->Flags);
903 		device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
904 	}
905 
906 	/*
907 	 * The ErrorDataLength is unlikely to be useful for the log, so
908 	 * print it at debug level only.
909 	 */
910 	if (ratelimitok) {
911 		aprint_debug_dev(sc->sc_dev, "%s:"
912 		    " ErrorDataLength=0x%"PRIu32"\n",
913 		    ctx, gede->ErrorDataLength);
914 	}
915 
916 	/*
917 	 * Print the FRU Id and text, if available.
918 	 */
919 	if (ratelimitok &&
920 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
921 		struct uuid fruid;
922 
923 		apei_cper_guid_dec(gede->FruId, &fruid);
924 		apei_format_guid(&fruid, guidstr);
925 		device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
926 	}
927 	if (ratelimitok &&
928 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
929 		device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
930 		    ctx, gede->FruText);
931 	}
932 
933 	/*
934 	 * Print the timestamp, if available by the revision number and
935 	 * the validation bits.
936 	 */
937 	if (ratelimitok &&
938 	    gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
939 	    gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
940 		const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
941 		const uint8_t s = t[0];
942 		const uint8_t m = t[1];
943 		const uint8_t h = t[2];
944 		const uint8_t f = t[3];
945 		const uint8_t D = t[4];
946 		const uint8_t M = t[5];
947 		const uint8_t Y = t[6];
948 		const uint8_t C = t[7];
949 
950 		device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
951 		    " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
952 		    ctx, gede_v3->TimeStamp,
953 		    C,Y, M, D, h,m,s,
954 		    f & __BIT(0) ? " (event time)" : " (collect time)");
955 	}
956 }
957 
958 /*
959  * apei_gesb_ratelimit
960  *
961  *	State to limit the rate of console log messages about hardware
962  *	errors.  For each of the four severity levels in a Generic
963  *	Error Status Block,
964  *
965  *	0 - Recoverable (uncorrectable),
966  *	1 - Fatal (uncorrectable),
967  *	2 - Corrected, and
968  *	3 - None (including ill-formed errors),
969  *
970  *	we record the last time it happened, protected by a CPU simple
971  *	lock that we only try-acquire so it is safe to use in any
972  *	context, including non-maskable interrupt context.
973  */
974 
975 static struct {
976 	__cpu_simple_lock_t	lock;
977 	struct timeval		lasttime;
978 	volatile uint32_t	suppressed;
979 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
980 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
981 	[ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
982 	[ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
983 	[ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
984 };
985 
986 static void
987 atomic_incsat_32(volatile uint32_t *p)
988 {
989 	uint32_t o, n;
990 
991 	do {
992 		o = atomic_load_relaxed(p);
993 		if (__predict_false(o == UINT_MAX))
994 			return;
995 		n = o + 1;
996 	} while (__predict_false(atomic_cas_32(p, o, n) != o));
997 }
998 
999 /*
1000  * apei_gesb_ratecheck(sc, severity, suppressed)
1001  *
1002  *	Check for a rate limit on errors of the specified severity.
1003  *
1004  *	=> Return true if the error should be printed, and format into
1005  *	   the buffer suppressed a message saying how many errors were
1006  *	   previously suppressed.
1007  *
1008  *	=> Return false if the error should be suppressed because the
1009  *	   last one printed was too recent.
1010  */
1011 static bool
1012 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
1013     char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
1014 {
1015 	/* one of each type per minute (XXX worth making configurable?) */
1016 	const struct timeval mininterval = {60, 0};
1017 	unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
1018 	bool ok = false;
1019 
1020 	/*
1021 	 * If the lock is contended, the rate limit is probably
1022 	 * exceeded, so it's not OK to print.
1023 	 *
1024 	 * Otherwise, with the lock held, ask ratecheck(9) whether it's
1025 	 * OK to print.
1026 	 */
1027 	if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
1028 		goto out;
1029 	ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
1030 	__cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
1031 
1032 out:	/*
1033 	 * If it's OK to print, report the number of errors that were
1034 	 * suppressed.  If it's not OK to print, count a suppressed
1035 	 * error.
1036 	 */
1037 	if (ok) {
1038 		const uint32_t n =
1039 		    atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
1040 
1041 		if (n == 0) {
1042 			suppressed[0] = '\0';
1043 		} else {
1044 			snprintf(suppressed,
1045 			    sizeof(" (4294967295 or more errors suppressed)"),
1046 			    " (%u%s error%s suppressed)",
1047 			    n,
1048 			    n == UINT32_MAX ? " or more" : "",
1049 			    n == 1 ? "" : "s");
1050 		}
1051 	} else {
1052 		atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
1053 		suppressed[0] = '\0';
1054 	}
1055 	return ok;
1056 }
1057 
1058 /*
1059  * apei_gesb_report(sc, gesb, size, ctx)
1060  *
1061  *	Check a Generic Error Status Block, of at most the specified
1062  *	size in bytes, and report any errors in it.  Return the 32-bit
1063  *	Block Status in case the caller needs it to acknowledge the
1064  *	report to firmware.
1065  */
1066 uint32_t
1067 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
1068     size_t size, const char *ctx, bool *fatalp)
1069 {
1070 	uint32_t status, unknownstatus, severity, nentries, i;
1071 	uint32_t datalen, rawdatalen;
1072 	const ACPI_HEST_GENERIC_DATA *gede0, *gede;
1073 	const unsigned char *rawdata;
1074 	bool ratelimitok = false;
1075 	char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
1076 	bool fatal = false;
1077 
1078 	/*
1079 	 * Verify the buffer is large enough for a Generic Error Status
1080 	 * Block before we try to touch anything in it.
1081 	 */
1082 	if (size < sizeof(*gesb)) {
1083 		ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
1084 		    suppressed);
1085 		if (ratelimitok) {
1086 			device_printf(sc->sc_dev,
1087 			    "%s: truncated GESB, %zu < %zu%s\n",
1088 			    ctx, size, sizeof(*gesb), suppressed);
1089 		}
1090 		status = 0;
1091 		goto out;
1092 	}
1093 	size -= sizeof(*gesb);
1094 
1095 	/*
1096 	 * Load the status.  Access ordering rules are unclear in the
1097 	 * ACPI specification; I'm guessing that load-acquire of the
1098 	 * block status is a good idea before any other access to the
1099 	 * GESB.
1100 	 */
1101 	status = atomic_load_acquire(&gesb->BlockStatus);
1102 
1103 	/*
1104 	 * If there are no status bits set, the rest of the GESB is
1105 	 * garbage, so stop here.
1106 	 */
1107 	if (status == 0) {
1108 		/* XXX dtrace */
1109 		/* XXX DPRINTF */
1110 		goto out;
1111 	}
1112 
1113 	/*
1114 	 * Read out the severity and get the number of entries in this
1115 	 * status block.
1116 	 */
1117 	severity = gesb->ErrorSeverity;
1118 	nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
1119 
1120 	/*
1121 	 * Print a message to the console and dmesg about the severity
1122 	 * of the error.
1123 	 */
1124 	ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
1125 	if (ratelimitok) {
1126 		char statusbuf[128];
1127 
1128 		/* XXX define this format somewhere */
1129 		snprintb(statusbuf, sizeof(statusbuf), "\177\020"
1130 		    "b\000"	"UE\0"
1131 		    "b\001"	"CE\0"
1132 		    "b\002"	"MULTI_UE\0"
1133 		    "b\003"	"MULTI_CE\0"
1134 		    "f\004\010"	"GEDE_COUNT\0"
1135 		    "\0", status);
1136 
1137 		if (severity < __arraycount(apei_gesb_severity)) {
1138 			device_printf(sc->sc_dev, "%s"
1139 			    " reported hardware error%s:"
1140 			    " severity=%s nentries=%u status=%s\n",
1141 			    ctx, suppressed,
1142 			    apei_gesb_severity[severity], nentries, statusbuf);
1143 		} else {
1144 			device_printf(sc->sc_dev, "%s reported error%s:"
1145 			    " severity=%"PRIu32" nentries=%u status=%s\n",
1146 			    ctx, suppressed,
1147 			    severity, nentries, statusbuf);
1148 		}
1149 	}
1150 
1151 	/*
1152 	 * Make a determination about whether the error is fatal.
1153 	 *
1154 	 * XXX Currently we don't have any mechanism to recover from
1155 	 * uncorrectable but recoverable errors, so we treat those --
1156 	 * and anything else we don't recognize -- as fatal.
1157 	 */
1158 	switch (severity) {
1159 	case ACPI_HEST_GEN_ERROR_CORRECTED:
1160 	case ACPI_HEST_GEN_ERROR_NONE:
1161 		fatal = false;
1162 		break;
1163 	case ACPI_HEST_GEN_ERROR_FATAL:
1164 	case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
1165 	default:
1166 		fatal = true;
1167 		break;
1168 	}
1169 
1170 	/*
1171 	 * Clear the bits we know about to warn if there's anything
1172 	 * left we don't understand.
1173 	 */
1174 	unknownstatus = status;
1175 	unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
1176 	unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
1177 	unknownstatus &= ~ACPI_HEST_CORRECTABLE;
1178 	unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
1179 	unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
1180 	if (ratelimitok && unknownstatus != 0) {
1181 		/* XXX dtrace */
1182 		device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
1183 		    " 0x%"PRIx32"\n", ctx, unknownstatus);
1184 	}
1185 
1186 	/*
1187 	 * Advance past the Generic Error Status Block (GESB) header to
1188 	 * the Generic Error Data Entries (GEDEs).
1189 	 */
1190 	gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
1191 
1192 	/*
1193 	 * Verify that the data length (GEDEs) fits within the size.
1194 	 * If not, truncate the GEDEs.
1195 	 */
1196 	datalen = gesb->DataLength;
1197 	if (size < datalen) {
1198 		if (ratelimitok) {
1199 			device_printf(sc->sc_dev, "%s:"
1200 			    " GESB DataLength exceeds bounds:"
1201 			    " %zu < %"PRIu32"\n",
1202 			    ctx, size, datalen);
1203 		}
1204 		datalen = size;
1205 	}
1206 	size -= datalen;
1207 
1208 	/*
1209 	 * Report each of the Generic Error Data Entries.
1210 	 */
1211 	for (i = 0; i < nentries; i++) {
1212 		size_t headerlen;
1213 		const struct apei_cper_report *report;
1214 		char subctx[128];
1215 
1216 		/*
1217 		 * Format a subcontext to show this numbered entry of
1218 		 * the GESB.
1219 		 */
1220 		snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
1221 
1222 		/*
1223 		 * If the remaining GESB data length isn't enough for a
1224 		 * GEDE header, stop here.
1225 		 */
1226 		if (datalen < sizeof(*gede)) {
1227 			if (ratelimitok) {
1228 				device_printf(sc->sc_dev, "%s:"
1229 				    " truncated GEDE: %"PRIu32" < %zu bytes\n",
1230 				    subctx, datalen, sizeof(*gede));
1231 			}
1232 			break;
1233 		}
1234 
1235 		/*
1236 		 * Print the GEDE header and get the full length (may
1237 		 * vary from revision to revision of the GEDE) and the
1238 		 * CPER report function if possible.
1239 		 */
1240 		apei_gede_report_header(sc, gede, subctx, ratelimitok,
1241 		    &headerlen, &report);
1242 
1243 		/*
1244 		 * If we don't know the header length because of an
1245 		 * unfamiliar revision, stop here.
1246 		 */
1247 		if (headerlen == 0) {
1248 			if (ratelimitok) {
1249 				device_printf(sc->sc_dev, "%s:"
1250 				    " unknown revision: 0x%"PRIx16"\n",
1251 				    subctx, gede->Revision);
1252 			}
1253 			break;
1254 		}
1255 
1256 		/*
1257 		 * Stop here if what we mapped is too small for the
1258 		 * error data length.
1259 		 */
1260 		datalen -= headerlen;
1261 		if (datalen < gede->ErrorDataLength) {
1262 			if (ratelimitok) {
1263 				device_printf(sc->sc_dev, "%s:"
1264 				    " truncated GEDE payload:"
1265 				    " %"PRIu32" < %"PRIu32" bytes\n",
1266 				    subctx, datalen, gede->ErrorDataLength);
1267 			}
1268 			break;
1269 		}
1270 
1271 		/*
1272 		 * Report the Common Platform Error Record appendix to
1273 		 * this Generic Error Data Entry.
1274 		 */
1275 		if (report == NULL) {
1276 			if (ratelimitok) {
1277 				device_printf(sc->sc_dev, "%s:"
1278 				    " [unknown type]\n", ctx);
1279 			}
1280 		} else {
1281 			/* XXX pass ratelimit through */
1282 			(*report->func)(sc, (const char *)gede + headerlen,
1283 			    gede->ErrorDataLength, subctx, ratelimitok);
1284 		}
1285 
1286 		/*
1287 		 * Advance past the GEDE header and CPER data to the
1288 		 * next GEDE.
1289 		 */
1290 		gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1291 		    + headerlen + gede->ErrorDataLength);
1292 	}
1293 
1294 	/*
1295 	 * Advance past the Generic Error Data Entries (GEDEs) to the
1296 	 * raw error data.
1297 	 *
1298 	 * XXX Provide Max Raw Data Length as a parameter, as found in
1299 	 * various HEST entry types.
1300 	 */
1301 	rawdata = (const unsigned char *)gede0 + datalen;
1302 
1303 	/*
1304 	 * Verify that the raw data length fits within the size.  If
1305 	 * not, truncate the raw data.
1306 	 */
1307 	rawdatalen = gesb->RawDataLength;
1308 	if (size < rawdatalen) {
1309 		if (ratelimitok) {
1310 			device_printf(sc->sc_dev, "%s:"
1311 			    " GESB RawDataLength exceeds bounds:"
1312 			    " %zu < %"PRIu32"\n",
1313 			    ctx, size, rawdatalen);
1314 		}
1315 		rawdatalen = size;
1316 	}
1317 	size -= rawdatalen;
1318 
1319 	/*
1320 	 * Hexdump the raw data, if any.
1321 	 */
1322 	if (ratelimitok && rawdatalen > 0) {
1323 		char devctx[128];
1324 
1325 		snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1326 		    device_xname(sc->sc_dev), ctx);
1327 		hexdump(printf, devctx, rawdata, rawdatalen);
1328 	}
1329 
1330 	/*
1331 	 * If there's anything left after the raw data, warn.
1332 	 */
1333 	if (ratelimitok && size > 0) {
1334 		device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1335 		    ctx, size);
1336 	}
1337 
1338 	/*
1339 	 * Return the status so the caller can ack it, and tell the
1340 	 * caller whether this error is fatal.
1341 	 */
1342 out:	*fatalp = fatal;
1343 	return status;
1344 }
1345 
1346 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1347 
1348 #ifdef _MODULE
1349 #include "ioconf.c"
1350 #endif
1351 
1352 static int
1353 apei_modcmd(modcmd_t cmd, void *opaque)
1354 {
1355 	int error = 0;
1356 
1357 	switch (cmd) {
1358 	case MODULE_CMD_INIT:
1359 #ifdef _MODULE
1360 		error = config_init_component(cfdriver_ioconf_apei,
1361 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
1362 #endif
1363 		return error;
1364 	case MODULE_CMD_FINI:
1365 #ifdef _MODULE
1366 		error = config_fini_component(cfdriver_ioconf_apei,
1367 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
1368 #endif
1369 		return error;
1370 	default:
1371 		return ENOTTY;
1372 	}
1373 }
1374