xref: /netbsd-src/sys/dev/acpi/apei_hest.c (revision a314b8f31d720d5f80071a443e3c11bd4c66ace0)
1 /*	$NetBSD: apei_hest.c,v 1.7 2025/01/05 22:11:18 andvar Exp $	*/
2 
3 /*-
4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * APEI HEST -- Hardware Error Source Table
31  *
32  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#acpi-error-source
33  *
34  * XXX uncorrectable error NMI comes in on all CPUs at once, what to do?
35  *
36  * XXX AMD MCA
37  *
38  * XXX IA32 machine check stuff
39  *
40  * XXX switch-to-polling for GHES notifications
41  *
42  * XXX error threshold for GHES notifications
43  *
44  * XXX sort out interrupt notification types, e.g. do we ever need to
45  * do acpi_intr_establish?
46  *
47  * XXX sysctl knob to force polling each particular error source that
48  * supports it
49  *
50  * XXX consider a lighter-weight polling schedule for machines with
51  * thousands of polled GHESes
52  */
53 
54 #include <sys/cdefs.h>
55 __KERNEL_RCSID(0, "$NetBSD: apei_hest.c,v 1.7 2025/01/05 22:11:18 andvar Exp $");
56 
57 #include <sys/types.h>
58 
59 #include <sys/atomic.h>
60 #include <sys/kmem.h>
61 #include <sys/lock.h>
62 #include <sys/systm.h>
63 
64 #include <dev/acpi/acpivar.h>
65 #include <dev/acpi/apei_cper.h>
66 #include <dev/acpi/apei_hestvar.h>
67 #include <dev/acpi/apei_hed.h>
68 #include <dev/acpi/apei_mapreg.h>
69 #include <dev/acpi/apeivar.h>
70 
71 #if defined(__i386__) || defined(__x86_64__)
72 #include <x86/nmi.h>
73 #endif
74 
75 #include "ioconf.h"
76 
77 #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
78 ACPI_MODULE_NAME	("apei")
79 
80 /*
81  * apei_hest_ghes_handle(sc, src)
82  *
83  *	Check for, report, and acknowledge any error from a Generic
84  *	Hardware Error Source (GHES, not GHESv2).  Return true if there
85  *	was any error to report, false if not.
86  */
87 static bool
88 apei_hest_ghes_handle(struct apei_softc *sc, struct apei_source *src)
89 {
90 	ACPI_HEST_GENERIC *ghes = container_of(src->as_header,
91 	    ACPI_HEST_GENERIC, Header);
92 	ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb;
93 	char ctx[sizeof("error source 65535")];
94 	uint32_t status;
95 	bool fatal = false;
96 
97 	/*
98 	 * Process and report any error.
99 	 */
100 	snprintf(ctx, sizeof(ctx), "error source %"PRIu16,
101 	    ghes->Header.SourceId);
102 	status = apei_gesb_report(sc, src->as_ghes.gesb,
103 	    ghes->ErrorBlockLength, ctx, &fatal);
104 
105 	/*
106 	 * Acknowledge the error by clearing the block status.  To
107 	 * avoid races, we probably have to avoid further access to the
108 	 * GESB until we get another notification.
109 	 *
110 	 * As a precaution, we zero this with atomic compare-and-swap
111 	 * so at least we can see if the status changed while we were
112 	 * working on it.
113 	 *
114 	 * It is tempting to clear bits with atomic and-complement, but
115 	 * the BlockStatus is not just a bit mask -- bits [13:4] are a
116 	 * count of Generic Error Data Entries, and who knows what bits
117 	 * [31:14] might be used for in the future.
118 	 *
119 	 * XXX The GHES(v1) protocol is unclear from the specification
120 	 * here.  The GHESv2 protocol has a separate register write to
121 	 * acknowledge, which is a bit clearer.
122 	 */
123 	membar_release();
124 	const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0);
125 	if (status1 != status) {
126 		device_printf(sc->sc_dev, "%s: status changed from"
127 		    " 0x%"PRIx32" to 0x%"PRIx32"\n",
128 		    ctx, status, status1);
129 	}
130 
131 	/*
132 	 * If the error was fatal, panic now.
133 	 */
134 	if (fatal)
135 		panic("fatal hardware error");
136 
137 	return status != 0;
138 }
139 
140 /*
141  * apei_hest_ghes_v2_handle(sc, src)
142  *
143  *	Check for, report, and acknowledge any error from a Generic
144  *	Hardware Error Source v2.  Return true if there was any error
145  *	to report, false if not.
146  */
147 static bool
148 apei_hest_ghes_v2_handle(struct apei_softc *sc, struct apei_source *src)
149 {
150 	ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header,
151 	    ACPI_HEST_GENERIC_V2, Header);
152 	ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb;
153 	char ctx[sizeof("error source 65535")];
154 	uint64_t X;
155 	uint32_t status;
156 	bool fatal;
157 
158 	/*
159 	 * Process and report any error.
160 	 */
161 	snprintf(ctx, sizeof(ctx), "error source %"PRIu16,
162 	    ghes_v2->Header.SourceId);
163 	status = apei_gesb_report(sc, src->as_ghes.gesb,
164 	    ghes_v2->ErrorBlockLength, ctx, &fatal);
165 
166 	/*
167 	 * First clear the block status.  As a precaution, we zero this
168 	 * with atomic compare-and-swap so at least we can see if the
169 	 * status changed while we were working on it.
170 	 */
171 	membar_release();
172 	const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0);
173 	if (status1 != status) {
174 		device_printf(sc->sc_dev, "%s: status changed from"
175 		    " 0x%"PRIx32" to 0x%"PRIx32"\n",
176 		    ctx, status, status1);
177 	}
178 
179 	/*
180 	 * Next, do the Read Ack dance.
181 	 *
182 	 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10
183 	 */
184 	X = apei_mapreg_read(&ghes_v2->ReadAckRegister,
185 	    src->as_ghes_v2.read_ack);
186 	X &= ghes_v2->ReadAckPreserve;
187 	X |= ghes_v2->ReadAckWrite;
188 	apei_mapreg_write(&ghes_v2->ReadAckRegister,
189 	    src->as_ghes_v2.read_ack, X);
190 
191 	/*
192 	 * If the error was fatal, panic now.
193 	 */
194 	if (fatal)
195 		panic("fatal hardware error");
196 
197 	return status != 0;
198 }
199 
200 /*
201  * apei_hest_ghes_poll(cookie)
202  *
203  *	Callout handler for periodic polling of a Generic Hardware
204  *	Error Source (GHES, not GHESv2), using Notification Type `0 -
205  *	Polled'.
206  *
207  *	cookie is the struct apei_source pointer for a single source;
208  *	if there are multiple sources there will be multiple callouts.
209  */
210 static void
211 apei_hest_ghes_poll(void *cookie)
212 {
213 	struct apei_source *src = cookie;
214 	struct apei_softc *sc = src->as_sc;
215 	ACPI_HEST_GENERIC *ghes = container_of(src->as_header,
216 	    ACPI_HEST_GENERIC, Header);
217 
218 	/*
219 	 * Process and acknowledge any error.
220 	 */
221 	(void)apei_hest_ghes_handle(sc, src);
222 
223 	/*
224 	 * Schedule polling again after the firmware-suggested
225 	 * interval.
226 	 */
227 	callout_schedule(&src->as_ch,
228 	    MAX(1, mstohz(ghes->Notify.PollInterval)));
229 }
230 
231 /*
232  * apei_hest_ghes_v2_poll(cookie)
233  *
234  *	Callout handler for periodic polling of a Generic Hardware
235  *	Error Source v2, using Notification Type `0 - Polled'.
236  *
237  *	cookie is the struct apei_source pointer for a single source;
238  *	if there are multiple sources there will be multiple callouts.
239  */
240 static void
241 apei_hest_ghes_v2_poll(void *cookie)
242 {
243 	struct apei_source *src = cookie;
244 	struct apei_softc *sc = src->as_sc;
245 	ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header,
246 	    ACPI_HEST_GENERIC_V2, Header);
247 
248 	/*
249 	 * Process and acknowledge any error.
250 	 */
251 	(void)apei_hest_ghes_v2_handle(sc, src);
252 
253 	/*
254 	 * Schedule polling again after the firmware-suggested
255 	 * interval.
256 	 */
257 	callout_schedule(&src->as_ch,
258 	    MAX(1, mstohz(ghes_v2->Notify.PollInterval)));
259 }
260 
261 #if defined(__i386__) || defined(__x86_64__)
262 
263 /*
264  * The NMI is (sometimes?) delivered to all CPUs at once.  To reduce
265  * confusion, let's try to have only one CPU process error
266  * notifications at a time.
267  */
268 static __cpu_simple_lock_t apei_hest_nmi_lock = __SIMPLELOCK_UNLOCKED;
269 
270 /*
271  * apei_hest_ghes_nmi(tf, cookie)
272  *
273  *	Nonmaskable interrupt handler for Generic Hardware Error
274  *	Sources (GHES, not GHESv2) with Notification Type `4 - NMI'.
275  */
276 static int
277 apei_hest_ghes_nmi(const struct trapframe *tf, void *cookie)
278 {
279 	struct apei_source *src = cookie;
280 	struct apei_softc *sc = src->as_sc;
281 
282 	__cpu_simple_lock(&apei_hest_nmi_lock);
283 	const bool mine = apei_hest_ghes_handle(sc, src);
284 	__cpu_simple_unlock(&apei_hest_nmi_lock);
285 
286 	/*
287 	 * Tell the NMI subsystem whether this interrupt could have
288 	 * been for us or not.
289 	 */
290 	return mine;
291 }
292 
293 /*
294  * apei_hest_ghes_v2_nmi(tf, cookie)
295  *
296  *	Nonmaskable interrupt handler for Generic Hardware Error
297  *	Sources v2 with Notification Type `4 - NMI'.
298  */
299 static int
300 apei_hest_ghes_v2_nmi(const struct trapframe *tf, void *cookie)
301 {
302 	struct apei_source *src = cookie;
303 	struct apei_softc *sc = src->as_sc;
304 
305 	__cpu_simple_lock(&apei_hest_nmi_lock);
306 	const bool mine = apei_hest_ghes_v2_handle(sc, src);
307 	__cpu_simple_unlock(&apei_hest_nmi_lock);
308 
309 	/*
310 	 * Tell the NMI subsystem whether this interrupt could have
311 	 * been for us or not.
312 	 */
313 	return mine;
314 }
315 
316 #endif	/* defined(__i386__) || defined(__x86_64__) */
317 
318 /*
319  * apei_hest_attach_ghes(sc, ghes, i)
320  *
321  *	Attach a Generic Hardware Error Source (GHES, not GHESv2) as
322  *	the ith source in the Hardware Error Source Table.
323  *
324  *	After this point, the system will check for and handle errors
325  *	when notified by this source.
326  */
327 static void
328 apei_hest_attach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes,
329     uint32_t i)
330 {
331 	struct apei_hest_softc *hsc = &sc->sc_hest;
332 	struct apei_source *src = &hsc->hsc_source[i];
333 	uint64_t addr;
334 	ACPI_STATUS rv;
335 	char ctx[sizeof("HEST[4294967295, Id=65535]")];
336 
337 	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
338 	    i, ghes->Header.SourceId);
339 
340 	/*
341 	 * Verify the source is enabled before proceeding.  The Enabled
342 	 * field is 8 bits with 256 possibilities, but only two of the
343 	 * possibilities, 0 and 1, have semantics defined in the spec,
344 	 * so out of an abundance of caution let's tread carefully in
345 	 * case anything changes and noisily reject any values other
346 	 * than 1.
347 	 */
348 	switch (ghes->Enabled) {
349 	case 1:
350 		break;
351 	case 0:
352 		aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx);
353 		return;
354 	default:
355 		aprint_error_dev(sc->sc_dev, "%s: unknown GHES Enabled state:"
356 		    " 0x%"PRIx8"\n", ctx, ghes->Enabled);
357 		return;
358 	}
359 
360 	/*
361 	 * Verify the Error Status Address bit width is at most 64 bits
362 	 * before proceeding with this source.  When we get 128-bit
363 	 * addressing, this code will have to be updated.
364 	 */
365 	if (ghes->ErrorStatusAddress.BitWidth > 64) {
366 		aprint_error_dev(sc->sc_dev, "%s: excessive address bits:"
367 		    " %"PRIu8"\n", ctx, ghes->ErrorStatusAddress.BitWidth);
368 		return;
369 	}
370 
371 	/*
372 	 * Read the GHES Error Status Address.  This is the physical
373 	 * address of a GESB, Generic Error Status Block.  Why the
374 	 * physical address is exposed via this indirection, and not
375 	 * simply stored directly in the GHES, is unclear to me.
376 	 * Hoping it's not because the address can change dynamically,
377 	 * because the error handling path shouldn't involve mapping
378 	 * anything.
379 	 */
380 	rv = AcpiRead(&addr, &ghes->ErrorStatusAddress);
381 	if (ACPI_FAILURE(rv)) {
382 		aprint_error_dev(sc->sc_dev, "%s:"
383 		    " failed to read error status address: %s", ctx,
384 		    AcpiFormatException(rv));
385 		return;
386 	}
387 	aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx,
388 	    addr);
389 
390 	/*
391 	 * Initialize the source and map the GESB so we can get at it
392 	 * in the error handling path.
393 	 */
394 	src->as_sc = sc;
395 	src->as_header = &ghes->Header;
396 	src->as_ghes.gesb = AcpiOsMapMemory(addr, ghes->ErrorBlockLength);
397 
398 	/*
399 	 * Arrange to receive notifications.
400 	 */
401 	switch (ghes->Notify.Type) {
402 	case ACPI_HEST_NOTIFY_POLLED:
403 		if (ghes->Notify.PollInterval == 0) /* paranoia */
404 			break;
405 		callout_init(&src->as_ch, CALLOUT_MPSAFE);
406 		callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src);
407 		callout_schedule(&src->as_ch, 0);
408 		break;
409 	case ACPI_HEST_NOTIFY_SCI:
410 	case ACPI_HEST_NOTIFY_GPIO:
411 		/*
412 		 * SCI and GPIO notifications are delivered through
413 		 * Hardware Error Device (PNP0C33) events.
414 		 *
415 		 * XXX Where is this spelled out?  The text at
416 		 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources
417 		 * is vague.
418 		 */
419 		SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry);
420 		break;
421 #if defined(__i386__) || defined(__x86_64__)
422 	case ACPI_HEST_NOTIFY_NMI:
423 		src->as_nmi = nmi_establish(&apei_hest_ghes_nmi, src);
424 		break;
425 #endif
426 	}
427 
428 	/*
429 	 * Now that we have notification set up, process and
430 	 * acknowledge the initial GESB report if any.
431 	 */
432 	apei_hest_ghes_handle(sc, src);
433 }
434 
435 /*
436  * apei_hest_detach_ghes(sc, ghes, i)
437  *
438  *	Detach the ith source, which is a Generic Hardware Error Source
439  *	(GHES, not GHESv2).
440  *
441  *	After this point, the system will ignore notifications from
442  *	this source.
443  */
444 static void
445 apei_hest_detach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes,
446     uint32_t i)
447 {
448 	struct apei_hest_softc *hsc = &sc->sc_hest;
449 	struct apei_source *src = &hsc->hsc_source[i];
450 
451 	/*
452 	 * Arrange to stop receiving notifications.
453 	 */
454 	switch (ghes->Notify.Type) {
455 	case ACPI_HEST_NOTIFY_POLLED:
456 		if (ghes->Notify.PollInterval == 0) /* paranoia */
457 			break;
458 		callout_halt(&src->as_ch, NULL);
459 		callout_destroy(&src->as_ch);
460 		break;
461 	case ACPI_HEST_NOTIFY_SCI:
462 	case ACPI_HEST_NOTIFY_GPIO:
463 		/*
464 		 * No need to spend time removing the entry; no further
465 		 * calls via apei_hed_notify are possible at this
466 		 * point, now that detach has begun.
467 		 */
468 		break;
469 #if defined(__i386__) || defined(__x86_64__)
470 	case ACPI_HEST_NOTIFY_NMI:
471 		nmi_disestablish(src->as_nmi);
472 		src->as_nmi = NULL;
473 		break;
474 #endif
475 	}
476 
477 	/*
478 	 * No more notifications.  Unmap the GESB and destroy the
479 	 * interrupt source now that it will no longer be used in
480 	 * error handling path.
481 	 */
482 	AcpiOsUnmapMemory(src->as_ghes.gesb, ghes->ErrorBlockLength);
483 	src->as_ghes.gesb = NULL;
484 	src->as_header = NULL;
485 	src->as_sc = NULL;
486 }
487 
488 
489 /*
490  * apei_hest_attach_ghes_v2(sc, ghes_v2, i)
491  *
492  *	Attach a Generic Hardware Error Source v2 as the ith source in
493  *	the Hardware Error Source Table.
494  *
495  *	After this point, the system will check for and handle errors
496  *	when notified by this source.
497  */
498 static void
499 apei_hest_attach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2,
500     uint32_t i)
501 {
502 	struct apei_hest_softc *hsc = &sc->sc_hest;
503 	struct apei_source *src = &hsc->hsc_source[i];
504 	uint64_t addr;
505 	struct apei_mapreg *read_ack;
506 	ACPI_STATUS rv;
507 	char ctx[sizeof("HEST[4294967295, Id=65535]")];
508 
509 	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
510 	    i, ghes_v2->Header.SourceId);
511 
512 	/*
513 	 * Verify the source is enabled before proceeding.  The Enabled
514 	 * field is 8 bits with 256 possibilities, but only two of the
515 	 * possibilities, 0 and 1, have semantics defined in the spec,
516 	 * so out of an abundance of caution let's tread carefully in
517 	 * case anything changes and noisily reject any values other
518 	 * than 1.
519 	 */
520 	switch (ghes_v2->Enabled) {
521 	case 1:
522 		break;
523 	case 0:
524 		aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx);
525 		return;
526 	default:
527 		aprint_error_dev(sc->sc_dev, "%s:"
528 		    " unknown GHESv2 Enabled state: 0x%"PRIx8"\n", ctx,
529 		    ghes_v2->Enabled);
530 		return;
531 	}
532 
533 	/*
534 	 * Verify the Error Status Address bit width is at most 64 bits
535 	 * before proceeding with this source.  When we get 128-bit
536 	 * addressing, this code will have to be updated.
537 	 */
538 	if (ghes_v2->ErrorStatusAddress.BitWidth > 64) {
539 		aprint_error_dev(sc->sc_dev, "%s: excessive address bits:"
540 		    " %"PRIu8"\n", ctx, ghes_v2->ErrorStatusAddress.BitWidth);
541 		return;
542 	}
543 
544 	/*
545 	 * Read the GHESv2 Error Status Address.  This is the physical
546 	 * address of a GESB, Generic Error Status Block.  Why the
547 	 * physical address is exposed via this indirection, and not
548 	 * simply stored directly in the GHESv2, is unclear to me.
549 	 * Hoping it's not because the address can change dynamically,
550 	 * because the error handling path shouldn't involve mapping
551 	 * anything.
552 	 */
553 	rv = AcpiRead(&addr, &ghes_v2->ErrorStatusAddress);
554 	if (ACPI_FAILURE(rv)) {
555 		aprint_error_dev(sc->sc_dev, "%s:"
556 		    " failed to read error status address: %s", ctx,
557 		    AcpiFormatException(rv));
558 		return;
559 	}
560 	aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx,
561 	    addr);
562 
563 	/*
564 	 * Try to map the Read Ack register up front, so we don't have
565 	 * to allocate and free kva in AcpiRead/AcpiWrite at the time
566 	 * we're handling an error.  Bail if we can't.
567 	 */
568 	read_ack = apei_mapreg_map(&ghes_v2->ReadAckRegister);
569 	if (read_ack == NULL) {
570 		aprint_error_dev(sc->sc_dev, "%s:"
571 		    " unable to map Read Ack register\n", ctx);
572 		return;
573 	}
574 
575 	/*
576 	 * Initialize the source and map the GESB it in the error
577 	 * handling path.
578 	 */
579 	src->as_sc = sc;
580 	src->as_header = &ghes_v2->Header;
581 	src->as_ghes_v2.gesb = AcpiOsMapMemory(addr,
582 	    ghes_v2->ErrorBlockLength);
583 	src->as_ghes_v2.read_ack = read_ack;
584 
585 	/*
586 	 * Arrange to receive notifications.
587 	 */
588 	switch (ghes_v2->Notify.Type) {
589 	case ACPI_HEST_NOTIFY_POLLED:
590 		if (ghes_v2->Notify.PollInterval == 0) /* paranoia */
591 			break;
592 		callout_init(&src->as_ch, CALLOUT_MPSAFE);
593 		callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src);
594 		callout_schedule(&src->as_ch, 0);
595 		break;
596 	case ACPI_HEST_NOTIFY_SCI:
597 	case ACPI_HEST_NOTIFY_GPIO:
598 		/*
599 		 * SCI and GPIO notifications are delivered through
600 		 * Hardware Error Device (PNP0C33) events.
601 		 *
602 		 * XXX Where is this spelled out?  The text at
603 		 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources
604 		 * is vague.
605 		 */
606 		SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry);
607 		break;
608 #if defined(__i386__) || defined(__x86_64__)
609 	case ACPI_HEST_NOTIFY_NMI:
610 		src->as_nmi = nmi_establish(&apei_hest_ghes_v2_nmi, src);
611 		break;
612 #endif
613 	}
614 
615 	/*
616 	 * Now that we have notification set up, process and
617 	 * acknowledge the initial GESB report if any.
618 	 */
619 	apei_hest_ghes_handle(sc, src);
620 }
621 
622 /*
623  * apei_hest_detach_ghes_v2(sc, ghes_v2, i)
624  *
625  *	Detach the ith source, which is a Generic Hardware Error Source
626  *	v2.
627  *
628  *	After this point, the system will ignore notifications from
629  *	this source.
630  */
631 static void
632 apei_hest_detach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2,
633     uint32_t i)
634 {
635 	struct apei_hest_softc *hsc = &sc->sc_hest;
636 	struct apei_source *src = &hsc->hsc_source[i];
637 
638 	/*
639 	 * Arrange to stop receiving notifications.
640 	 */
641 	switch (ghes_v2->Notify.Type) {
642 	case ACPI_HEST_NOTIFY_POLLED:
643 		if (ghes_v2->Notify.PollInterval == 0) /* paranoia */
644 			break;
645 		callout_halt(&src->as_ch, NULL);
646 		callout_destroy(&src->as_ch);
647 		break;
648 	case ACPI_HEST_NOTIFY_SCI:
649 	case ACPI_HEST_NOTIFY_GPIO:
650 		/*
651 		 * No need to spend time removing the entry; no further
652 		 * calls via apei_hed_notify are possible at this
653 		 * point, now that detach has begun.
654 		 */
655 		break;
656 #if defined(__i386__) || defined(__x86_64__)
657 	case ACPI_HEST_NOTIFY_NMI:
658 		nmi_disestablish(src->as_nmi);
659 		src->as_nmi = NULL;
660 		break;
661 #endif
662 	}
663 
664 	/*
665 	 * No more notifications.  Unmap the GESB and read ack register
666 	 * now that it will no longer be used in error handling path.
667 	 */
668 	AcpiOsUnmapMemory(src->as_ghes_v2.gesb, ghes_v2->ErrorBlockLength);
669 	src->as_ghes_v2.gesb = NULL;
670 	apei_mapreg_unmap(&ghes_v2->ReadAckRegister, src->as_ghes_v2.read_ack);
671 	src->as_ghes_v2.read_ack = NULL;
672 	src->as_header = NULL;
673 	src->as_sc = NULL;
674 }
675 
676 /*
677  * apei_hest_attach_source(sc, header, i, size_t maxlen)
678  *
679  *	Attach the ith source in the Hardware Error Source Table given
680  *	its header, and return a pointer to the header of the next
681  *	source in the table, provided it is no more than maxlen bytes
682  *	past header.  Return NULL if the size of the source is unknown
683  *	or would exceed maxlen bytes.
684  */
685 static ACPI_HEST_HEADER *
686 apei_hest_attach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header,
687     uint32_t i, size_t maxlen)
688 {
689 	char ctx[sizeof("HEST[4294967295, Id=65535]")];
690 
691 	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
692 	    i, header->SourceId);
693 
694 	switch (header->Type) {
695 	case ACPI_HEST_TYPE_IA32_CHECK: {
696 		ACPI_HEST_IA_MACHINE_CHECK *const imc = container_of(header,
697 		    ACPI_HEST_IA_MACHINE_CHECK, Header);
698 
699 		aprint_error_dev(sc->sc_dev, "%s:"
700 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
701 
702 		if (maxlen < sizeof(*imc))
703 			return NULL;
704 		maxlen -= sizeof(*imc);
705 		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imc + 1);
706 		if (maxlen < imc->NumHardwareBanks*sizeof(*bank))
707 			return NULL;
708 		return (ACPI_HEST_HEADER *)(bank + imc->NumHardwareBanks);
709 	}
710 	case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: {
711 		ACPI_HEST_IA_CORRECTED *const imcc = container_of(header,
712 		    ACPI_HEST_IA_CORRECTED, Header);
713 
714 		aprint_error_dev(sc->sc_dev, "%s:"
715 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
716 
717 		if (maxlen < sizeof(*imcc))
718 			return NULL;
719 		maxlen -= sizeof(*imcc);
720 		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imcc + 1);
721 		if (maxlen < imcc->NumHardwareBanks*sizeof(*bank))
722 			return NULL;
723 		return (ACPI_HEST_HEADER *)(bank + imcc->NumHardwareBanks);
724 	}
725 	case ACPI_HEST_TYPE_IA32_NMI: {
726 		ACPI_HEST_IA_NMI *const ianmi = container_of(header,
727 		    ACPI_HEST_IA_NMI, Header);
728 
729 		aprint_error_dev(sc->sc_dev, "%s:"
730 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
731 
732 		if (maxlen < sizeof(*ianmi))
733 			return NULL;
734 		return (ACPI_HEST_HEADER *)(ianmi + 1);
735 	}
736 	case ACPI_HEST_TYPE_AER_ROOT_PORT: {
737 		ACPI_HEST_AER_ROOT *const aerroot = container_of(header,
738 		    ACPI_HEST_AER_ROOT, Header);
739 
740 		aprint_error_dev(sc->sc_dev, "%s:"
741 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
742 
743 		if (maxlen < sizeof(*aerroot))
744 			return NULL;
745 		return (ACPI_HEST_HEADER *)(aerroot + 1);
746 	}
747 	case ACPI_HEST_TYPE_AER_ENDPOINT: {
748 		ACPI_HEST_AER *const aer = container_of(header,
749 		    ACPI_HEST_AER, Header);
750 
751 		aprint_error_dev(sc->sc_dev, "%s:"
752 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
753 
754 		if (maxlen < sizeof(*aer))
755 			return NULL;
756 		return (ACPI_HEST_HEADER *)(aer + 1);
757 	}
758 	case ACPI_HEST_TYPE_AER_BRIDGE: {
759 		ACPI_HEST_AER_BRIDGE *const aerbridge = container_of(header,
760 		    ACPI_HEST_AER_BRIDGE, Header);
761 
762 		aprint_error_dev(sc->sc_dev, "%s:"
763 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
764 
765 		if (maxlen < sizeof(*aerbridge))
766 			return NULL;
767 		return (ACPI_HEST_HEADER *)(aerbridge + 1);
768 	}
769 	case ACPI_HEST_TYPE_GENERIC_ERROR: {
770 		ACPI_HEST_GENERIC *const ghes = container_of(header,
771 		    ACPI_HEST_GENERIC, Header);
772 
773 		if (maxlen < sizeof(*ghes))
774 			return NULL;
775 		apei_hest_attach_ghes(sc, ghes, i);
776 		return (ACPI_HEST_HEADER *)(ghes + 1);
777 	}
778 	case ACPI_HEST_TYPE_GENERIC_ERROR_V2: {
779 		ACPI_HEST_GENERIC_V2 *const ghes_v2 = container_of(header,
780 		    ACPI_HEST_GENERIC_V2, Header);
781 
782 		if (maxlen < sizeof(*ghes_v2))
783 			return NULL;
784 		apei_hest_attach_ghes_v2(sc, ghes_v2, i);
785 		return (ACPI_HEST_HEADER *)(ghes_v2 + 1);
786 	}
787 	case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: {
788 		ACPI_HEST_IA_DEFERRED_CHECK *const imdc = container_of(header,
789 		    ACPI_HEST_IA_DEFERRED_CHECK, Header);
790 
791 		aprint_error_dev(sc->sc_dev, "%s:"
792 		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
793 
794 		if (maxlen < sizeof(*imdc))
795 			return NULL;
796 		maxlen -= sizeof(*imdc);
797 		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imdc + 1);
798 		if (maxlen < imdc->NumHardwareBanks*sizeof(*bank))
799 			return NULL;
800 		return (ACPI_HEST_HEADER *)(bank + imdc->NumHardwareBanks);
801 	}
802 	case ACPI_HEST_TYPE_NOT_USED3:
803 	case ACPI_HEST_TYPE_NOT_USED4:
804 	case ACPI_HEST_TYPE_NOT_USED5:
805 	default:
806 		aprint_error_dev(sc->sc_dev, "%s: unknown type:"
807 		    " 0x%04"PRIx16"\n", ctx, header->Type);
808 		if (header->Type >= 12) {
809 			/*
810 			 * `Beginning with error source type 12 and
811 			 *  onward, each Error Source Structure must
812 			 *  use the standard Error Source Structure
813 			 *  Header as defined below.'
814 			 *
815 			 * Not yet in acpica, though, so we copy this
816 			 * down manually.
817 			 */
818 			struct {
819 				UINT16	Type;
820 				UINT16	Length;
821 			} *const essh = (void *)header;
822 
823 			if (maxlen < sizeof(*essh) || maxlen < essh->Length)
824 				return NULL;
825 			return (ACPI_HEST_HEADER *)((char *)header +
826 			    essh->Length);
827 		}
828 		return NULL;
829 	}
830 }
831 
832 /*
833  * apei_hest_detach_source(sc, header, i)
834  *
835  *	Detach the ith source in the Hardware Error Status Table.
836  *	Caller is assumed to have stored where each source's header is,
837  *	so no need to return the pointer to the header of the next
838  *	source in the table.
839  */
840 static void
841 apei_hest_detach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header,
842     uint32_t i)
843 {
844 
845 	switch (header->Type) {
846 	case ACPI_HEST_TYPE_GENERIC_ERROR: {
847 		ACPI_HEST_GENERIC *ghes = container_of(header,
848 		    ACPI_HEST_GENERIC, Header);
849 
850 		apei_hest_detach_ghes(sc, ghes, i);
851 		break;
852 	}
853 	case ACPI_HEST_TYPE_GENERIC_ERROR_V2: {
854 		ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(header,
855 		    ACPI_HEST_GENERIC_V2, Header);
856 
857 		apei_hest_detach_ghes_v2(sc, ghes_v2, i);
858 		break;
859 	}
860 	case ACPI_HEST_TYPE_IA32_CHECK:
861 	case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
862 	case ACPI_HEST_TYPE_IA32_NMI:
863 	case ACPI_HEST_TYPE_NOT_USED3:
864 	case ACPI_HEST_TYPE_NOT_USED4:
865 	case ACPI_HEST_TYPE_NOT_USED5:
866 	case ACPI_HEST_TYPE_AER_ROOT_PORT:
867 	case ACPI_HEST_TYPE_AER_ENDPOINT:
868 	case ACPI_HEST_TYPE_AER_BRIDGE:
869 	case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK:
870 	default:
871 		/* XXX shouldn't happen */
872 		break;
873 	}
874 }
875 
876 /*
877  * apei_hest_attach(sc)
878  *
879  *	Scan the Hardware Error Source Table and attach sources
880  *	enumerated in it so we can receive and process hardware errors
881  *	during operation.
882  */
883 void
884 apei_hest_attach(struct apei_softc *sc)
885 {
886 	ACPI_TABLE_HEST *hest = sc->sc_tab.hest;
887 	struct apei_hest_softc *hsc = &sc->sc_hest;
888 	ACPI_HEST_HEADER *header, *next;
889 	uint32_t i, n;
890 	size_t resid;
891 
892 	/*
893 	 * Initialize the HED (Hardware Error Device, PNP0C33)
894 	 * notification list so apei_hed_notify becomes a noop with no
895 	 * extra effort even if we fail to attach anything.
896 	 */
897 	SIMPLEQ_INIT(&hsc->hsc_hed_list);
898 
899 	/*
900 	 * Verify the table is large enough.
901 	 */
902 	if (hest->Header.Length < sizeof(*hest)) {
903 		aprint_error_dev(sc->sc_dev, "HEST: truncated table:"
904 		    " %"PRIu32" < %zu minimum bytes\n",
905 		    hest->Header.Length, sizeof(*hest));
906 		return;
907 	}
908 
909 	n = hest->ErrorSourceCount;
910 	aprint_normal_dev(sc->sc_dev, "HEST: %"PRIu32
911 	    " hardware error source%s\n", n, n == 1 ? "" : "s");
912 
913 	/*
914 	 * This could be SIZE_MAX but let's put a smaller arbitrary
915 	 * limit on it; if you have gigabytes of HEST something is
916 	 * probably wrong.
917 	 */
918 	if (n > MIN(SIZE_MAX, INT32_MAX)/sizeof(hsc->hsc_source[0])) {
919 		aprint_error_dev(sc->sc_dev, "HEST: too many error sources\n");
920 		return;
921 	}
922 	hsc->hsc_source = kmem_zalloc(n * sizeof(hsc->hsc_source[0]),
923 	    KM_SLEEP);
924 
925 	header = (ACPI_HEST_HEADER *)(hest + 1);
926 	resid = hest->Header.Length - sizeof(*hest);
927 	for (i = 0; i < n && resid; i++, header = next) {
928 		next = apei_hest_attach_source(sc, header, i, resid);
929 		if (next == NULL) {
930 			aprint_error_dev(sc->sc_dev, "truncated source:"
931 			    " %"PRIu32"\n", i);
932 			break;
933 		}
934 		KASSERT(header < next);
935 		KASSERT((size_t)((const char *)next - (const char *)header) <=
936 		    resid);
937 		resid -= (const char *)next - (const char *)header;
938 	}
939 	if (resid) {
940 		aprint_error_dev(sc->sc_dev, "HEST:"
941 		    " %zu bytes of trailing garbage after %"PRIu32" entries\n",
942 		    resid, n);
943 	}
944 }
945 
946 /*
947  * apei_hest_detach(sc)
948  *
949  *	Stop receiving and processing hardware error notifications and
950  *	free resources set up from the Hardware Error Source Table.
951  */
952 void
953 apei_hest_detach(struct apei_softc *sc)
954 {
955 	ACPI_TABLE_HEST *hest = sc->sc_tab.hest;
956 	struct apei_hest_softc *hsc = &sc->sc_hest;
957 	uint32_t i, n;
958 
959 	if (hsc->hsc_source) {
960 		n = hest->ErrorSourceCount;
961 		for (i = 0; i < n; i++) {
962 			struct apei_source *src = &hsc->hsc_source[i];
963 			ACPI_HEST_HEADER *header = src->as_header;
964 
965 			if (src->as_header == NULL)
966 				continue;
967 			apei_hest_detach_source(sc, header, i);
968 		}
969 		kmem_free(hsc->hsc_source, n * sizeof(hsc->hsc_source[0]));
970 		hsc->hsc_source = NULL;
971 	}
972 }
973 
974 void
975 apei_hed_notify(void)
976 {
977 	device_t apei0;
978 	struct apei_softc *sc;
979 	struct apei_hest_softc *hsc;
980 	struct apei_source *src;
981 
982 	/*
983 	 * Take a reference to the apei0 device so it doesn't go away
984 	 * while we're working.
985 	 */
986 	if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL)
987 		goto out;
988 	sc = device_private(apei0);
989 
990 	/*
991 	 * If there's no HEST, nothing to do.
992 	 */
993 	if (sc->sc_tab.hest == NULL)
994 		goto out;
995 	hsc = &sc->sc_hest;
996 
997 	/*
998 	 * Walk through the HED-notified hardware error sources and
999 	 * check them.  The list is stable until we release apei0.
1000 	 */
1001 	SIMPLEQ_FOREACH(src, &hsc->hsc_hed_list, as_entry) {
1002 		ACPI_HEST_HEADER *const header = src->as_header;
1003 
1004 		switch (header->Type) {
1005 		case ACPI_HEST_TYPE_GENERIC_ERROR:
1006 			apei_hest_ghes_handle(sc, src);
1007 			break;
1008 		case ACPI_HEST_TYPE_GENERIC_ERROR_V2:
1009 			apei_hest_ghes_v2_handle(sc, src);
1010 			break;
1011 		case ACPI_HEST_TYPE_IA32_CHECK:
1012 		case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
1013 		case ACPI_HEST_TYPE_IA32_NMI:
1014 		case ACPI_HEST_TYPE_NOT_USED3:
1015 		case ACPI_HEST_TYPE_NOT_USED4:
1016 		case ACPI_HEST_TYPE_NOT_USED5:
1017 		case ACPI_HEST_TYPE_AER_ROOT_PORT:
1018 		case ACPI_HEST_TYPE_AER_ENDPOINT:
1019 		case ACPI_HEST_TYPE_AER_BRIDGE:
1020 //		case ACPI_HEST_TYPE_GENERIC_ERROR:
1021 //		case ACPI_HEST_TYPE_GENERIC_ERROR_V2:
1022 		case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK:
1023 		default:
1024 			/* XXX shouldn't happen */
1025 			break;
1026 		}
1027 	}
1028 
1029 out:	if (apei0) {
1030 		device_release(apei0);
1031 		apei0 = NULL;
1032 	}
1033 }
1034