1*6a2ddc3cSriastradh /* $NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $ */ 24b159fe5Sriastradh 34b159fe5Sriastradh /*- 44b159fe5Sriastradh * Copyright (c) 2024 The NetBSD Foundation, Inc. 54b159fe5Sriastradh * All rights reserved. 64b159fe5Sriastradh * 74b159fe5Sriastradh * Redistribution and use in source and binary forms, with or without 84b159fe5Sriastradh * modification, are permitted provided that the following conditions 94b159fe5Sriastradh * are met: 104b159fe5Sriastradh * 1. Redistributions of source code must retain the above copyright 114b159fe5Sriastradh * notice, this list of conditions and the following disclaimer. 124b159fe5Sriastradh * 2. Redistributions in binary form must reproduce the above copyright 134b159fe5Sriastradh * notice, this list of conditions and the following disclaimer in the 144b159fe5Sriastradh * documentation and/or other materials provided with the distribution. 154b159fe5Sriastradh * 164b159fe5Sriastradh * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 174b159fe5Sriastradh * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 184b159fe5Sriastradh * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 194b159fe5Sriastradh * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 204b159fe5Sriastradh * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 214b159fe5Sriastradh * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 224b159fe5Sriastradh * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 234b159fe5Sriastradh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 244b159fe5Sriastradh * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 254b159fe5Sriastradh * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 264b159fe5Sriastradh * POSSIBILITY OF SUCH DAMAGE. 274b159fe5Sriastradh */ 284b159fe5Sriastradh 294b159fe5Sriastradh /* 304b159fe5Sriastradh * APEI: ACPI Platform Error Interface 314b159fe5Sriastradh * 324b159fe5Sriastradh * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html 334b159fe5Sriastradh * 344b159fe5Sriastradh * XXX dtrace probes 354b159fe5Sriastradh * 364b159fe5Sriastradh * XXX call _OSC appropriately to announce to the platform that we, the 374b159fe5Sriastradh * OSPM, support APEI 384b159fe5Sriastradh */ 394b159fe5Sriastradh 404b159fe5Sriastradh #include <sys/cdefs.h> 41*6a2ddc3cSriastradh __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $"); 424b159fe5Sriastradh 434b159fe5Sriastradh #include <sys/param.h> 444b159fe5Sriastradh #include <sys/types.h> 454b159fe5Sriastradh 464b159fe5Sriastradh #include <sys/atomic.h> 4770f84647Sriastradh #include <sys/endian.h> 484b159fe5Sriastradh #include <sys/device.h> 494b159fe5Sriastradh #include <sys/module.h> 504b159fe5Sriastradh #include <sys/sysctl.h> 514b159fe5Sriastradh #include <sys/uuid.h> 524b159fe5Sriastradh 534b159fe5Sriastradh #include <dev/acpi/acpireg.h> 544b159fe5Sriastradh #include <dev/acpi/acpivar.h> 554b159fe5Sriastradh #include <dev/acpi/apei_bertvar.h> 564b159fe5Sriastradh #include <dev/acpi/apei_cper.h> 574b159fe5Sriastradh #include <dev/acpi/apei_einjvar.h> 584b159fe5Sriastradh #include <dev/acpi/apei_erstvar.h> 594b159fe5Sriastradh #include <dev/acpi/apei_hestvar.h> 604b159fe5Sriastradh #include <dev/acpi/apei_interp.h> 614b159fe5Sriastradh #include <dev/acpi/apeivar.h> 6270f84647Sriastradh #include <dev/pci/pcireg.h> 634b159fe5Sriastradh 644b159fe5Sriastradh #define _COMPONENT ACPI_RESOURCE_COMPONENT 654b159fe5Sriastradh ACPI_MODULE_NAME ("apei") 664b159fe5Sriastradh 674b159fe5Sriastradh static int apei_match(device_t, cfdata_t, void *); 684b159fe5Sriastradh static void apei_attach(device_t, device_t, void *); 694b159fe5Sriastradh static int apei_detach(device_t, int); 704b159fe5Sriastradh 714b159fe5Sriastradh static void apei_get_tables(struct apei_tab *); 724b159fe5Sriastradh static void apei_put_tables(struct apei_tab *); 734b159fe5Sriastradh 744b159fe5Sriastradh static void apei_identify(struct apei_softc *, const char *, 754b159fe5Sriastradh const ACPI_TABLE_HEADER *); 764b159fe5Sriastradh 774b159fe5Sriastradh CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), 784b159fe5Sriastradh apei_match, apei_attach, apei_detach, NULL); 794b159fe5Sriastradh 804b159fe5Sriastradh static int 814b159fe5Sriastradh apei_match(device_t parent, cfdata_t match, void *aux) 824b159fe5Sriastradh { 834b159fe5Sriastradh struct apei_tab tab; 844b159fe5Sriastradh int prio = 0; 854b159fe5Sriastradh 864b159fe5Sriastradh /* 874b159fe5Sriastradh * If we have any of the APEI tables, match. 884b159fe5Sriastradh */ 894b159fe5Sriastradh apei_get_tables(&tab); 904b159fe5Sriastradh if (tab.bert || tab.einj || tab.erst || tab.hest) 914b159fe5Sriastradh prio = 1; 924b159fe5Sriastradh apei_put_tables(&tab); 934b159fe5Sriastradh 944b159fe5Sriastradh return prio; 954b159fe5Sriastradh } 964b159fe5Sriastradh 974b159fe5Sriastradh static void 984b159fe5Sriastradh apei_attach(device_t parent, device_t self, void *aux) 994b159fe5Sriastradh { 1004b159fe5Sriastradh struct apei_softc *sc = device_private(self); 1014b159fe5Sriastradh const struct sysctlnode *sysctl_hw_acpi; 1024b159fe5Sriastradh int error; 1034b159fe5Sriastradh 1044b159fe5Sriastradh aprint_naive("\n"); 1054b159fe5Sriastradh aprint_normal(": ACPI Platform Error Interface\n"); 1064b159fe5Sriastradh 1074b159fe5Sriastradh pmf_device_register(self, NULL, NULL); 1084b159fe5Sriastradh 1094b159fe5Sriastradh sc->sc_dev = self; 1104b159fe5Sriastradh apei_get_tables(&sc->sc_tab); 1114b159fe5Sriastradh 1124b159fe5Sriastradh /* 1134b159fe5Sriastradh * Get the sysctl hw.acpi node. This should already be created 1144b159fe5Sriastradh * but I don't see an easy way to get at it. If this fails, 1154b159fe5Sriastradh * something is seriously wrong, so let's stop here. 1164b159fe5Sriastradh */ 1174b159fe5Sriastradh error = sysctl_createv(&sc->sc_sysctllog, 0, 1184b159fe5Sriastradh NULL, &sysctl_hw_acpi, 0, 1194b159fe5Sriastradh CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, 1204b159fe5Sriastradh CTL_HW, CTL_CREATE, CTL_EOL); 1214b159fe5Sriastradh if (error) { 1224b159fe5Sriastradh aprint_error_dev(sc->sc_dev, 1234b159fe5Sriastradh "failed to create sysctl hw.acpi: %d\n", error); 1244b159fe5Sriastradh return; 1254b159fe5Sriastradh } 1264b159fe5Sriastradh 1274b159fe5Sriastradh /* 1284b159fe5Sriastradh * Create sysctl hw.acpi.apei. 1294b159fe5Sriastradh */ 1304b159fe5Sriastradh error = sysctl_createv(&sc->sc_sysctllog, 0, 1314b159fe5Sriastradh &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, 1324b159fe5Sriastradh CTLTYPE_NODE, "apei", 1334b159fe5Sriastradh SYSCTL_DESCR("ACPI Platform Error Interface"), 1344b159fe5Sriastradh NULL, 0, NULL, 0, 1354b159fe5Sriastradh CTL_CREATE, CTL_EOL); 1364b159fe5Sriastradh if (error) { 1374b159fe5Sriastradh aprint_error_dev(sc->sc_dev, 1384b159fe5Sriastradh "failed to create sysctl hw.acpi.apei: %d\n", error); 1394b159fe5Sriastradh return; 1404b159fe5Sriastradh } 1414b159fe5Sriastradh 1424b159fe5Sriastradh /* 1434b159fe5Sriastradh * Set up BERT, EINJ, ERST, and HEST. 1444b159fe5Sriastradh */ 1454b159fe5Sriastradh if (sc->sc_tab.bert) { 1464b159fe5Sriastradh apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); 1474b159fe5Sriastradh apei_bert_attach(sc); 1484b159fe5Sriastradh } 1494b159fe5Sriastradh if (sc->sc_tab.einj) { 1504b159fe5Sriastradh apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); 1514b159fe5Sriastradh apei_einj_attach(sc); 1524b159fe5Sriastradh } 1534b159fe5Sriastradh if (sc->sc_tab.erst) { 1544b159fe5Sriastradh apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); 1554b159fe5Sriastradh apei_erst_attach(sc); 1564b159fe5Sriastradh } 1574b159fe5Sriastradh if (sc->sc_tab.hest) { 1584b159fe5Sriastradh apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); 1594b159fe5Sriastradh apei_hest_attach(sc); 1604b159fe5Sriastradh } 1614b159fe5Sriastradh } 1624b159fe5Sriastradh 1634b159fe5Sriastradh static int 1644b159fe5Sriastradh apei_detach(device_t self, int flags) 1654b159fe5Sriastradh { 1664b159fe5Sriastradh struct apei_softc *sc = device_private(self); 1674b159fe5Sriastradh int error; 1684b159fe5Sriastradh 1694b159fe5Sriastradh /* 1704b159fe5Sriastradh * Detach children. We don't currently have any but this is 1714b159fe5Sriastradh * harmless without children and mandatory if we ever sprouted 1724b159fe5Sriastradh * them, so let's just leave it here for good measure. 1734b159fe5Sriastradh * 1744b159fe5Sriastradh * After this point, we are committed to detaching; failure is 1754b159fe5Sriastradh * forbidden. 1764b159fe5Sriastradh */ 1774b159fe5Sriastradh error = config_detach_children(self, flags); 1784b159fe5Sriastradh if (error) 1794b159fe5Sriastradh return error; 1804b159fe5Sriastradh 1814b159fe5Sriastradh /* 1824b159fe5Sriastradh * Tear down all the sysctl nodes first, before the software 1834b159fe5Sriastradh * state backing them goes away. 1844b159fe5Sriastradh */ 1854b159fe5Sriastradh sysctl_teardown(&sc->sc_sysctllog); 1864b159fe5Sriastradh sc->sc_sysctlroot = NULL; 1874b159fe5Sriastradh 1884b159fe5Sriastradh /* 1894b159fe5Sriastradh * Detach the software state for the APEI tables. 1904b159fe5Sriastradh */ 1914b159fe5Sriastradh if (sc->sc_tab.hest) 1924b159fe5Sriastradh apei_hest_detach(sc); 1934b159fe5Sriastradh if (sc->sc_tab.erst) 1944b159fe5Sriastradh apei_erst_detach(sc); 1954b159fe5Sriastradh if (sc->sc_tab.einj) 1964b159fe5Sriastradh apei_einj_detach(sc); 1974b159fe5Sriastradh if (sc->sc_tab.bert) 1984b159fe5Sriastradh apei_bert_detach(sc); 1994b159fe5Sriastradh 2004b159fe5Sriastradh /* 2014b159fe5Sriastradh * Release the APEI tables and we're done. 2024b159fe5Sriastradh */ 2034b159fe5Sriastradh apei_put_tables(&sc->sc_tab); 2044b159fe5Sriastradh pmf_device_deregister(self); 2054b159fe5Sriastradh return 0; 2064b159fe5Sriastradh } 2074b159fe5Sriastradh 2084b159fe5Sriastradh /* 2094b159fe5Sriastradh * apei_get_tables(tab) 2104b159fe5Sriastradh * 2114b159fe5Sriastradh * Get references to whichever APEI-related tables -- BERT, EINJ, 2124b159fe5Sriastradh * ERST, HEST -- are available in the system. 2134b159fe5Sriastradh */ 2144b159fe5Sriastradh static void 2154b159fe5Sriastradh apei_get_tables(struct apei_tab *tab) 2164b159fe5Sriastradh { 2174b159fe5Sriastradh ACPI_STATUS rv; 2184b159fe5Sriastradh 2194b159fe5Sriastradh /* 2204b159fe5Sriastradh * Probe the BERT -- Boot Error Record Table. 2214b159fe5Sriastradh */ 2224b159fe5Sriastradh rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); 2234b159fe5Sriastradh if (ACPI_FAILURE(rv)) 2244b159fe5Sriastradh tab->bert = NULL; 2254b159fe5Sriastradh 2264b159fe5Sriastradh /* 2274b159fe5Sriastradh * Probe the EINJ -- Error Injection Table. 2284b159fe5Sriastradh */ 2294b159fe5Sriastradh rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); 2304b159fe5Sriastradh if (ACPI_FAILURE(rv)) 2314b159fe5Sriastradh tab->einj = NULL; 2324b159fe5Sriastradh 2334b159fe5Sriastradh /* 2344b159fe5Sriastradh * Probe the ERST -- Error Record Serialization Table. 2354b159fe5Sriastradh */ 2364b159fe5Sriastradh rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); 2374b159fe5Sriastradh if (ACPI_FAILURE(rv)) 2384b159fe5Sriastradh tab->erst = NULL; 2394b159fe5Sriastradh 2404b159fe5Sriastradh /* 2414b159fe5Sriastradh * Probe the HEST -- Hardware Error Source Table. 2424b159fe5Sriastradh */ 2434b159fe5Sriastradh rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); 2444b159fe5Sriastradh if (ACPI_FAILURE(rv)) 2454b159fe5Sriastradh tab->hest = NULL; 2464b159fe5Sriastradh } 2474b159fe5Sriastradh 2484b159fe5Sriastradh /* 2494b159fe5Sriastradh * apei_put_tables(tab) 2504b159fe5Sriastradh * 2514b159fe5Sriastradh * Release the tables acquired by apei_get_tables. 2524b159fe5Sriastradh */ 2534b159fe5Sriastradh static void 2544b159fe5Sriastradh apei_put_tables(struct apei_tab *tab) 2554b159fe5Sriastradh { 2564b159fe5Sriastradh 2574b159fe5Sriastradh if (tab->bert != NULL) { 2584b159fe5Sriastradh AcpiPutTable(&tab->bert->Header); 2594b159fe5Sriastradh tab->bert = NULL; 2604b159fe5Sriastradh } 2614b159fe5Sriastradh if (tab->einj != NULL) { 2624b159fe5Sriastradh AcpiPutTable(&tab->einj->Header); 2634b159fe5Sriastradh tab->einj = NULL; 2644b159fe5Sriastradh } 2654b159fe5Sriastradh if (tab->erst != NULL) { 2664b159fe5Sriastradh AcpiPutTable(&tab->erst->Header); 2674b159fe5Sriastradh tab->erst = NULL; 2684b159fe5Sriastradh } 2694b159fe5Sriastradh if (tab->hest != NULL) { 2704b159fe5Sriastradh AcpiPutTable(&tab->hest->Header); 2714b159fe5Sriastradh tab->hest = NULL; 2724b159fe5Sriastradh } 2734b159fe5Sriastradh } 2744b159fe5Sriastradh 2754b159fe5Sriastradh /* 2764b159fe5Sriastradh * apei_identify(sc, name, header) 2774b159fe5Sriastradh * 2784b159fe5Sriastradh * Identify the APEI-related table header for dmesg. 2794b159fe5Sriastradh */ 2804b159fe5Sriastradh static void 2814b159fe5Sriastradh apei_identify(struct apei_softc *sc, const char *name, 2824b159fe5Sriastradh const ACPI_TABLE_HEADER *h) 2834b159fe5Sriastradh { 2844b159fe5Sriastradh 2854b159fe5Sriastradh aprint_normal_dev(sc->sc_dev, "%s:" 2864b159fe5Sriastradh " OemId <%6.6s,%8.8s,%08x>" 2874b159fe5Sriastradh " AslId <%4.4s,%08x>\n", 2884b159fe5Sriastradh name, 2894b159fe5Sriastradh h->OemId, h->OemTableId, h->OemRevision, 2904b159fe5Sriastradh h->AslCompilerId, h->AslCompilerRevision); 2914b159fe5Sriastradh } 2924b159fe5Sriastradh 2934b159fe5Sriastradh /* 2944b159fe5Sriastradh * apei_cper_guid_dec(buf, uuid) 2954b159fe5Sriastradh * 2964b159fe5Sriastradh * Decode a Common Platform Error Record UUID/GUID from an ACPI 2974b159fe5Sriastradh * table at buf into a sys/uuid.h struct uuid. 2984b159fe5Sriastradh */ 2994b159fe5Sriastradh static void 3004b159fe5Sriastradh apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) 3014b159fe5Sriastradh { 3024b159fe5Sriastradh 3034b159fe5Sriastradh uuid_dec_le(buf, uuid); 3044b159fe5Sriastradh } 3054b159fe5Sriastradh 3064b159fe5Sriastradh /* 3074b159fe5Sriastradh * apei_format_guid(uuid, s) 3084b159fe5Sriastradh * 3094b159fe5Sriastradh * Format a UUID as a string. This uses C initializer notation, 31070f5b2bbSrillig * not UUID notation, in order to match the text in the UEFI 3114b159fe5Sriastradh * specification. 3124b159fe5Sriastradh */ 3134b159fe5Sriastradh static void 3144b159fe5Sriastradh apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) 3154b159fe5Sriastradh { 3164b159fe5Sriastradh 3174b159fe5Sriastradh snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," 3181eecaa72Sriastradh "{0x%02x,%02x," 3191eecaa72Sriastradh "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", 3204b159fe5Sriastradh uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, 3211eecaa72Sriastradh uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low, 3224b159fe5Sriastradh uuid->node[0], uuid->node[1], uuid->node[2], 3234b159fe5Sriastradh uuid->node[3], uuid->node[4], uuid->node[5]); 3244b159fe5Sriastradh } 3254b159fe5Sriastradh 3264b159fe5Sriastradh /* 3274b159fe5Sriastradh * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 3284b159fe5Sriastradh */ 3294b159fe5Sriastradh 3304b159fe5Sriastradh static const char *const cper_memory_error_type[] = { 3314b159fe5Sriastradh #define F(LN, SN, V) [LN] = #SN, 3324b159fe5Sriastradh CPER_MEMORY_ERROR_TYPES(F) 3334b159fe5Sriastradh #undef F 3344b159fe5Sriastradh }; 3354b159fe5Sriastradh 3364b159fe5Sriastradh /* 3374b159fe5Sriastradh * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block 3384b159fe5Sriastradh * 3394b159fe5Sriastradh * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this 3404b159fe5Sriastradh * but are designated as being intended for Generic Error Data Entries 3414b159fe5Sriastradh * rather than Generic Error Status Blocks. 3424b159fe5Sriastradh */ 3434b159fe5Sriastradh static const char *const apei_gesb_severity[] = { 3444b159fe5Sriastradh [0] = "recoverable", 3454b159fe5Sriastradh [1] = "fatal", 3464b159fe5Sriastradh [2] = "corrected", 3474b159fe5Sriastradh [3] = "none", 3484b159fe5Sriastradh }; 3494b159fe5Sriastradh 3504b159fe5Sriastradh /* 3514b159fe5Sriastradh * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry 3524b159fe5Sriastradh */ 3534b159fe5Sriastradh static const char *const apei_gede_severity[] = { 3544b159fe5Sriastradh [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", 3554b159fe5Sriastradh [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", 3564b159fe5Sriastradh [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", 3574b159fe5Sriastradh [ACPI_HEST_GEN_ERROR_NONE] = "none", 3584b159fe5Sriastradh }; 3594b159fe5Sriastradh 3604b159fe5Sriastradh /* 36106ebf6e7Sriastradh * N.2.5. Memory Error Section 36206ebf6e7Sriastradh * 3634b159fe5Sriastradh * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 3644b159fe5Sriastradh */ 3654b159fe5Sriastradh static const struct uuid CPER_MEMORY_ERROR_SECTION = 3664b159fe5Sriastradh {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; 3674b159fe5Sriastradh 3684b159fe5Sriastradh static void 3694b159fe5Sriastradh apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, 37009ddc9b2Sriastradh size_t len, const char *ctx, bool ratelimitok) 3714b159fe5Sriastradh { 3724b159fe5Sriastradh const struct cper_memory_error *ME = buf; 3734b159fe5Sriastradh char bitbuf[1024]; 3744b159fe5Sriastradh 37509ddc9b2Sriastradh /* 37609ddc9b2Sriastradh * If we've hit the rate limit, skip printing the error. 37709ddc9b2Sriastradh */ 37809ddc9b2Sriastradh if (!ratelimitok) 37909ddc9b2Sriastradh goto out; 38009ddc9b2Sriastradh 3814b159fe5Sriastradh snprintb(bitbuf, sizeof(bitbuf), 3824b159fe5Sriastradh CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits); 3834b159fe5Sriastradh aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); 3844b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) { 3854b159fe5Sriastradh /* 3864b159fe5Sriastradh * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status 3874b159fe5Sriastradh */ 3884b159fe5Sriastradh /* XXX define this format somewhere */ 3894b159fe5Sriastradh snprintb(bitbuf, sizeof(bitbuf), "\177\020" 3904b159fe5Sriastradh "f\010\010" "ErrorType\0" 3914b159fe5Sriastradh "=\001" "ERR_INTERNAL\0" 3924b159fe5Sriastradh "=\004" "ERR_MEM\0" 3934b159fe5Sriastradh "=\005" "ERR_TLB\0" 3944b159fe5Sriastradh "=\006" "ERR_CACHE\0" 3954b159fe5Sriastradh "=\007" "ERR_FUNCTION\0" 3964b159fe5Sriastradh "=\010" "ERR_SELFTEST\0" 3974b159fe5Sriastradh "=\011" "ERR_FLOW\0" 3984b159fe5Sriastradh "=\020" "ERR_BUS\0" 3994b159fe5Sriastradh "=\021" "ERR_MAP\0" 4004b159fe5Sriastradh "=\022" "ERR_IMPROPER\0" 4014b159fe5Sriastradh "=\023" "ERR_UNIMPL\0" 4024b159fe5Sriastradh "=\024" "ERR_LOL\0" 4034b159fe5Sriastradh "=\025" "ERR_RESPONSE\0" 4044b159fe5Sriastradh "=\026" "ERR_PARITY\0" 4054b159fe5Sriastradh "=\027" "ERR_PROTOCOL\0" 4064b159fe5Sriastradh "=\030" "ERR_ERROR\0" 4074b159fe5Sriastradh "=\031" "ERR_TIMEOUT\0" 4084b159fe5Sriastradh "=\032" "ERR_POISONED\0" 4094b159fe5Sriastradh "b\020" "AddressError\0" 4104b159fe5Sriastradh "b\021" "ControlError\0" 4114b159fe5Sriastradh "b\022" "DataError\0" 4124b159fe5Sriastradh "b\023" "ResponderDetected\0" 4134b159fe5Sriastradh "b\024" "RequesterDetected\0" 4144b159fe5Sriastradh "b\025" "FirstError\0" 4154b159fe5Sriastradh "b\026" "Overflow\0" 4164b159fe5Sriastradh "\0", ME->ErrorStatus); 4174b159fe5Sriastradh device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf); 4184b159fe5Sriastradh } 4194b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) { 4204b159fe5Sriastradh device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n", 4214b159fe5Sriastradh ctx, ME->PhysicalAddress); 4224b159fe5Sriastradh } 4234b159fe5Sriastradh if (ME->ValidationBits & 4244b159fe5Sriastradh CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) { 4254b159fe5Sriastradh device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64 4264b159fe5Sriastradh "\n", ctx, ME->PhysicalAddressMask); 4274b159fe5Sriastradh } 4284b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) { 4294b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx, 4304b159fe5Sriastradh ME->Node); 4314b159fe5Sriastradh } 4324b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) { 4334b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx, 4344b159fe5Sriastradh ME->Card); 4354b159fe5Sriastradh } 4364b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) { 4374b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx, 4384b159fe5Sriastradh ME->Module); 4394b159fe5Sriastradh } 4404b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) { 4414b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx, 4424b159fe5Sriastradh ME->Bank); 4434b159fe5Sriastradh } 4444b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) { 4454b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx, 4464b159fe5Sriastradh ME->Device); 4474b159fe5Sriastradh } 4484b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) { 4494b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx, 4504b159fe5Sriastradh ME->Row); 4514b159fe5Sriastradh } 4524b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) { 4534b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx, 4544b159fe5Sriastradh ME->Column); 4554b159fe5Sriastradh } 4564b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) { 4574b159fe5Sriastradh device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n", 4584b159fe5Sriastradh ctx, ME->BitPosition); 4594b159fe5Sriastradh } 4604b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) { 4614b159fe5Sriastradh device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n", 4624b159fe5Sriastradh ctx, ME->RequestorId); 4634b159fe5Sriastradh } 4644b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) { 4654b159fe5Sriastradh device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n", 4664b159fe5Sriastradh ctx, ME->ResponderId); 4674b159fe5Sriastradh } 4684b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) { 4694b159fe5Sriastradh device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n", 4704b159fe5Sriastradh ctx, ME->TargetId); 4714b159fe5Sriastradh } 4724b159fe5Sriastradh if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) { 4734b159fe5Sriastradh const uint8_t t = ME->MemoryErrorType; 4744b159fe5Sriastradh const char *n = t < __arraycount(cper_memory_error_type) 4754b159fe5Sriastradh ? cper_memory_error_type[t] : NULL; 4764b159fe5Sriastradh 4774b159fe5Sriastradh if (n) { 4784b159fe5Sriastradh device_printf(sc->sc_dev, "%s: MemoryErrorType=%d" 4794b159fe5Sriastradh " (%s)\n", ctx, t, n); 4804b159fe5Sriastradh } else { 4814b159fe5Sriastradh device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n", 4824b159fe5Sriastradh ctx, t); 4834b159fe5Sriastradh } 4844b159fe5Sriastradh } 48509ddc9b2Sriastradh 48609ddc9b2Sriastradh out: /* 48709ddc9b2Sriastradh * XXX pass this through to uvm(9) or userland for decisions 48809ddc9b2Sriastradh * like page retirement 48909ddc9b2Sriastradh */ 49009ddc9b2Sriastradh return; 4914b159fe5Sriastradh } 4924b159fe5Sriastradh 4934b159fe5Sriastradh /* 49470f84647Sriastradh * N.2.7. PCI Express Error Section 49570f84647Sriastradh * 49670f84647Sriastradh * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section 49770f84647Sriastradh */ 49870f84647Sriastradh static const struct uuid CPER_PCIE_ERROR_SECTION = 49970f84647Sriastradh {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}}; 50070f84647Sriastradh 50170f84647Sriastradh static const char *const cper_pcie_error_port_type[] = { 50270f84647Sriastradh #define F(LN, SN, V) [LN] = #SN, 50370f84647Sriastradh CPER_PCIE_ERROR_PORT_TYPES(F) 50470f84647Sriastradh #undef F 50570f84647Sriastradh }; 50670f84647Sriastradh 50770f84647Sriastradh static void 50870f84647Sriastradh apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len, 50970f84647Sriastradh const char *ctx, bool ratelimitok) 51070f84647Sriastradh { 51170f84647Sriastradh const struct cper_pcie_error *PE = buf; 51270f84647Sriastradh char bitbuf[1024]; 51370f84647Sriastradh 51470f84647Sriastradh /* 51570f84647Sriastradh * If we've hit the rate limit, skip printing the error. 51670f84647Sriastradh */ 51770f84647Sriastradh if (!ratelimitok) 51870f84647Sriastradh goto out; 51970f84647Sriastradh 52070f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), 52170f84647Sriastradh CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits); 52270f84647Sriastradh aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); 52370f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) { 52470f84647Sriastradh const uint32_t t = PE->PortType; 52570f84647Sriastradh const char *n = t < __arraycount(cper_pcie_error_port_type) 52670f84647Sriastradh ? cper_pcie_error_port_type[t] : NULL; 52770f84647Sriastradh 52870f84647Sriastradh if (n) { 52970f84647Sriastradh device_printf(sc->sc_dev, "%s: PortType=%"PRIu32 53070f84647Sriastradh " (%s)\n", ctx, t, n); 53170f84647Sriastradh } else { 53270f84647Sriastradh device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n", 53370f84647Sriastradh ctx, t); 53470f84647Sriastradh } 53570f84647Sriastradh } 53670f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) { 53770f84647Sriastradh /* XXX BCD */ 53870f84647Sriastradh device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n", 53970f84647Sriastradh ctx, PE->Version); 54070f84647Sriastradh } 54170f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) { 54270f84647Sriastradh /* XXX move me to pcireg.h */ 54370f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), "\177\020" 54470f84647Sriastradh /* command */ 54570f84647Sriastradh "b\000" "IO_ENABLE\0" 54670f84647Sriastradh "b\001" "MEM_ENABLE\0" 54770f84647Sriastradh "b\002" "MASTER_ENABLE\0" 54870f84647Sriastradh "b\003" "SPECIAL_ENABLE\0" 54970f84647Sriastradh "b\004" "INVALIDATE_ENABLE\0" 55070f84647Sriastradh "b\005" "PALETTE_ENABLE\0" 55170f84647Sriastradh "b\006" "PARITY_ENABLE\0" 55270f84647Sriastradh "b\007" "STEPPING_ENABLE\0" 55370f84647Sriastradh "b\010" "SERR_ENABLE\0" 55470f84647Sriastradh "b\011" "BACKTOBACK_ENABLE\0" 55570f84647Sriastradh "b\012" "INTERRUPT_DISABLE\0" 55670f84647Sriastradh /* status */ 55770f84647Sriastradh "b\023" "INT_STATUS\0" 55870f84647Sriastradh "b\024" "CAPLIST_SUPPORT\0" 55970f84647Sriastradh "b\025" "66MHZ_SUPPORT\0" 56070f84647Sriastradh "b\026" "UDF_SUPPORT\0" 56170f84647Sriastradh "b\027" "BACKTOBACK_SUPPORT\0" 56270f84647Sriastradh "b\030" "PARITY_ERROR\0" 56370f84647Sriastradh "f\031\002" "DEVSEL\0" 56470f84647Sriastradh "=\000" "FAST\0" 56570f84647Sriastradh "=\001" "MEDIUM\0" 56670f84647Sriastradh "=\002" "SLOW\0" 56770f84647Sriastradh "b\033" "TARGET_TARGET_ABORT\0" 56870f84647Sriastradh "b\034" "MASTER_TARGET_ABORT\0" 56970f84647Sriastradh "b\035" "MASTER_ABORT\0" 57070f84647Sriastradh "b\036" "SPECIAL_ERROR\0" 57170f84647Sriastradh "b\037" "PARITY_DETECT\0" 57270f84647Sriastradh "\0", PE->CommandStatus); 57370f84647Sriastradh device_printf(sc->sc_dev, "%s: CommandStatus=%s\n", 57470f84647Sriastradh ctx, bitbuf); 57570f84647Sriastradh } 57670f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) { 57770f84647Sriastradh device_printf(sc->sc_dev, "%s: DeviceID:" 57870f84647Sriastradh " VendorID=0x%04"PRIx16 57970f84647Sriastradh " DeviceID=0x%04"PRIx16 58070f84647Sriastradh " ClassCode=0x%06"PRIx32 58170f84647Sriastradh " Function=%"PRIu8 58270f84647Sriastradh " Device=%"PRIu8 58370f84647Sriastradh " Segment=%"PRIu16 58470f84647Sriastradh " Bus=%"PRIu8 58570f84647Sriastradh " SecondaryBus=%"PRIu8 58670f84647Sriastradh " Slot=0x%04"PRIx16 58770f84647Sriastradh " Reserved0=0x%02"PRIx8 58870f84647Sriastradh "\n", 58970f84647Sriastradh ctx, 59070f84647Sriastradh le16dec(PE->DeviceID.VendorID), 59170f84647Sriastradh le16dec(PE->DeviceID.DeviceID), 59270f84647Sriastradh (PE->DeviceID.ClassCode[0] | /* le24dec */ 59370f84647Sriastradh ((uint32_t)PE->DeviceID.ClassCode[1] << 8) | 59470f84647Sriastradh ((uint32_t)PE->DeviceID.ClassCode[2] << 16)), 59570f84647Sriastradh PE->DeviceID.Function, PE->DeviceID.Device, 59670f84647Sriastradh le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus, 59770f84647Sriastradh PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot), 59870f84647Sriastradh PE->DeviceID.Reserved0); 59970f84647Sriastradh } 60070f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) { 60170f84647Sriastradh device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n", 60270f84647Sriastradh ctx, PE->DeviceSerial); 60370f84647Sriastradh } 60470f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) { 60570f84647Sriastradh /* XXX snprintb */ 60670f84647Sriastradh device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32 60770f84647Sriastradh "\n", ctx, PE->BridgeControlStatus); 60870f84647Sriastradh } 60970f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) { 61070f84647Sriastradh uint32_t dcsr, dsr; 611e06b0061Sriastradh char hex[9*sizeof(PE->CapabilityStructure)/4]; 61270f84647Sriastradh unsigned i; 61370f84647Sriastradh 614e06b0061Sriastradh /* 615e06b0061Sriastradh * Display a hex dump of each 32-bit register in the 616e06b0061Sriastradh * PCIe capability structure. 617e06b0061Sriastradh */ 618e06b0061Sriastradh __CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0); 619e06b0061Sriastradh for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) { 620e06b0061Sriastradh snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ", 621e06b0061Sriastradh le32dec(&PE->CapabilityStructure[4*i])); 62270f84647Sriastradh } 623e06b0061Sriastradh hex[sizeof(hex) - 1] = '\0'; 62470f84647Sriastradh device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n", 62570f84647Sriastradh ctx, hex); 62670f84647Sriastradh 627e06b0061Sriastradh /* 628e06b0061Sriastradh * If the Device Status Register has any bits set, 629e06b0061Sriastradh * highlight it in particular -- these are probably 630e06b0061Sriastradh * error bits. 631e06b0061Sriastradh */ 63270f84647Sriastradh dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]); 63370f84647Sriastradh dsr = __SHIFTOUT(dcsr, __BITS(31,16)); 63470f84647Sriastradh if (dsr != 0) { 63570f84647Sriastradh /* 63670f84647Sriastradh * XXX move me to pcireg.h; note: high 63770f84647Sriastradh * half of DCSR 63870f84647Sriastradh */ 63970f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), "\177\020" 64070f84647Sriastradh "b\000" "CORRECTABLE_ERROR\0" 64170f84647Sriastradh "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0" 64270f84647Sriastradh "b\002" "FATAL_ERROR\0" 64370f84647Sriastradh "b\003" "UNSUPPORTED_REQUEST\0" 64470f84647Sriastradh "b\004" "AUX_POWER\0" 64570f84647Sriastradh "b\005" "TRANSACTIONS_PENDING\0" 64670f84647Sriastradh "\0", dsr); 64770f84647Sriastradh device_printf(sc->sc_dev, "%s: PCIe Device Status:" 64870f84647Sriastradh " %s\n", 64970f84647Sriastradh ctx, bitbuf); 65070f84647Sriastradh } 65170f84647Sriastradh } 65270f84647Sriastradh if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) { 65370f84647Sriastradh uint32_t uc_status, uc_sev; 65470f84647Sriastradh uint32_t cor_status; 65570f84647Sriastradh uint32_t control; 656e06b0061Sriastradh char hex[9*sizeof(PE->AERInfo)/4]; 65770f84647Sriastradh unsigned i; 65870f84647Sriastradh 659e06b0061Sriastradh /* 660e06b0061Sriastradh * Display a hex dump of each 32-bit register in the 661e06b0061Sriastradh * PCIe Advanced Error Reporting extended capability 662e06b0061Sriastradh * structure. 663e06b0061Sriastradh */ 664e06b0061Sriastradh __CTASSERT(sizeof(PE->AERInfo) % 4 == 0); 665e06b0061Sriastradh for (i = 0; i < sizeof(PE->AERInfo)/4; i++) { 666e06b0061Sriastradh snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ", 667e06b0061Sriastradh le32dec(&PE->AERInfo[4*i])); 66870f84647Sriastradh } 669e06b0061Sriastradh hex[sizeof(hex) - 1] = '\0'; 67070f84647Sriastradh device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex); 67170f84647Sriastradh 67270f84647Sriastradh /* XXX move me to pcireg.h */ 67370f84647Sriastradh #define PCI_AER_UC_STATUS_FMT "\177\020" \ 67470f84647Sriastradh "b\000" "UNDEFINED\0" \ 67570f84647Sriastradh "b\004" "DL_PROTOCOL_ERROR\0" \ 67670f84647Sriastradh "b\005" "SURPRISE_DOWN_ERROR\0" \ 67770f84647Sriastradh "b\014" "POISONED_TLP\0" \ 67870f84647Sriastradh "b\015" "FC_PROTOCOL_ERROR\0" \ 67970f84647Sriastradh "b\016" "COMPLETION_TIMEOUT\0" \ 68070f84647Sriastradh "b\017" "COMPLETION_ABORT\0" \ 68170f84647Sriastradh "b\020" "UNEXPECTED_COMPLETION\0" \ 68270f84647Sriastradh "b\021" "RECEIVER_OVERFLOW\0" \ 68370f84647Sriastradh "b\022" "MALFORMED_TLP\0" \ 68470f84647Sriastradh "b\023" "ECRC_ERROR\0" \ 68570f84647Sriastradh "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \ 68670f84647Sriastradh "b\025" "ACS_VIOLATION\0" \ 68770f84647Sriastradh "b\026" "INTERNAL_ERROR\0" \ 68870f84647Sriastradh "b\027" "MC_BLOCKED_TLP\0" \ 68970f84647Sriastradh "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \ 69070f84647Sriastradh "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \ 69170f84647Sriastradh "b\032" "POISONTLP_EGRESS_BLOCKED\0" \ 69270f84647Sriastradh "\0" 69370f84647Sriastradh 694e06b0061Sriastradh /* 695e06b0061Sriastradh * If there are any hardware error status bits set, 696e06b0061Sriastradh * highlight them in particular, in three groups: 697e06b0061Sriastradh * 698e06b0061Sriastradh * - uncorrectable fatal (UC_STATUS and UC_SEVERITY) 699e06b0061Sriastradh * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY) 700e06b0061Sriastradh * - corrected (COR_STATUS) 701e06b0061Sriastradh * 702e06b0061Sriastradh * And if there are any uncorrectable errors, show 703e06b0061Sriastradh * which one was reported first, according to 704e06b0061Sriastradh * CAP_CONTROL. 705e06b0061Sriastradh */ 70670f84647Sriastradh uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]); 70770f84647Sriastradh uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]); 70870f84647Sriastradh cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]); 70970f84647Sriastradh control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]); 71070f84647Sriastradh 71170f84647Sriastradh if (uc_status & uc_sev) { 71270f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 71370f84647Sriastradh uc_status & uc_sev); 71470f84647Sriastradh device_printf(sc->sc_dev, "%s:" 71570f84647Sriastradh " AER hardware fatal uncorrectable errors: %s\n", 71670f84647Sriastradh ctx, bitbuf); 71770f84647Sriastradh } 71870f84647Sriastradh if (uc_status & ~uc_sev) { 71970f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 720*6a2ddc3cSriastradh uc_status & ~uc_sev); 72170f84647Sriastradh device_printf(sc->sc_dev, "%s:" 722*6a2ddc3cSriastradh " AER hardware non-fatal uncorrectable errors:" 723*6a2ddc3cSriastradh " %s\n", 72470f84647Sriastradh ctx, bitbuf); 72570f84647Sriastradh } 72670f84647Sriastradh if (uc_status) { 72770f84647Sriastradh unsigned first = __SHIFTOUT(control, 72870f84647Sriastradh PCI_AER_FIRST_ERROR_PTR); 72970f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 73070f84647Sriastradh (uint32_t)1 << first); 73170f84647Sriastradh device_printf(sc->sc_dev, "%s:" 73270f84647Sriastradh " AER hardware first uncorrectable error: %s\n", 73370f84647Sriastradh ctx, bitbuf); 73470f84647Sriastradh } 73570f84647Sriastradh if (cor_status) { 73670f84647Sriastradh /* XXX move me to pcireg.h */ 73770f84647Sriastradh snprintb(bitbuf, sizeof(bitbuf), "\177\020" 73870f84647Sriastradh "b\000" "RECEIVER_ERROR\0" 73970f84647Sriastradh "b\006" "BAD_TLP\0" 74070f84647Sriastradh "b\007" "BAD_DLLP\0" 74170f84647Sriastradh "b\010" "REPLAY_NUM_ROLLOVER\0" 74270f84647Sriastradh "b\014" "REPLAY_TIMER_TIMEOUT\0" 74370f84647Sriastradh "b\015" "ADVISORY_NF_ERROR\0" 74470f84647Sriastradh "b\016" "INTERNAL_ERROR\0" 74570f84647Sriastradh "b\017" "HEADER_LOG_OVERFLOW\0" 74670f84647Sriastradh "\0", cor_status); 74770f84647Sriastradh device_printf(sc->sc_dev, "%s:" 74870f84647Sriastradh " AER hardware corrected error: %s\n", 74970f84647Sriastradh ctx, bitbuf); 75070f84647Sriastradh } 75170f84647Sriastradh } 75270f84647Sriastradh 75370f84647Sriastradh out: /* 75470f84647Sriastradh * XXX pass this on to the PCI subsystem to handle 75570f84647Sriastradh */ 75670f84647Sriastradh return; 75770f84647Sriastradh } 75870f84647Sriastradh 75970f84647Sriastradh /* 7604b159fe5Sriastradh * apei_cper_reports 7614b159fe5Sriastradh * 7624b159fe5Sriastradh * Table of known Common Platform Error Record types, symbolic 7634b159fe5Sriastradh * names, minimum data lengths, and functions to report them. 7644b159fe5Sriastradh * 7654b159fe5Sriastradh * The section types and corresponding section layouts are listed 7664b159fe5Sriastradh * at: 7674b159fe5Sriastradh * 7684b159fe5Sriastradh * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html 7694b159fe5Sriastradh */ 7704b159fe5Sriastradh static const struct apei_cper_report { 7714b159fe5Sriastradh const char *name; 7724b159fe5Sriastradh const struct uuid *type; 7734b159fe5Sriastradh size_t minlength; 77409ddc9b2Sriastradh void (*func)(struct apei_softc *, const void *, size_t, const char *, 77509ddc9b2Sriastradh bool); 7764b159fe5Sriastradh } apei_cper_reports[] = { 7774b159fe5Sriastradh { "memory", &CPER_MEMORY_ERROR_SECTION, 7784b159fe5Sriastradh sizeof(struct cper_memory_error), 7794b159fe5Sriastradh apei_cper_memory_error_report }, 78070f84647Sriastradh { "PCIe", &CPER_PCIE_ERROR_SECTION, 78170f84647Sriastradh sizeof(struct cper_pcie_error), 78270f84647Sriastradh apei_cper_pcie_error_report }, 7834b159fe5Sriastradh }; 7844b159fe5Sriastradh 7854b159fe5Sriastradh /* 78609ddc9b2Sriastradh * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report) 7874b159fe5Sriastradh * 7884b159fe5Sriastradh * Report the header of the ith Generic Error Data Entry in the 78909ddc9b2Sriastradh * given context, if ratelimitok is true. 7904b159fe5Sriastradh * 7914b159fe5Sriastradh * Return the actual length of the header in headerlen, or 0 if 7924b159fe5Sriastradh * not known because the revision isn't recognized. 7934b159fe5Sriastradh * 7944b159fe5Sriastradh * Return the report type in report, or NULL if not known because 7954b159fe5Sriastradh * the section type isn't recognized. 7964b159fe5Sriastradh */ 7974b159fe5Sriastradh static void 7984b159fe5Sriastradh apei_gede_report_header(struct apei_softc *sc, 79909ddc9b2Sriastradh const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok, 8004b159fe5Sriastradh size_t *headerlenp, const struct apei_cper_report **reportp) 8014b159fe5Sriastradh { 8024b159fe5Sriastradh const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede; 8034b159fe5Sriastradh struct uuid sectype; 8044b159fe5Sriastradh char guidstr[69]; 8054b159fe5Sriastradh char buf[128]; 8064b159fe5Sriastradh unsigned i; 8074b159fe5Sriastradh 8084b159fe5Sriastradh /* 8094b159fe5Sriastradh * Print the section type as a C initializer. It would be 8104b159fe5Sriastradh * prettier to use standard hyphenated UUID notation, but that 8114b159fe5Sriastradh * notation is slightly ambiguous here (two octets could be 8124b159fe5Sriastradh * written either way, depending on Microsoft convention -- 8134b159fe5Sriastradh * which influenced ACPI and UEFI -- or internet convention), 8144b159fe5Sriastradh * and the UEFI spec writes the C initializer notation, so this 8154b159fe5Sriastradh * makes it easier to search for. 8164b159fe5Sriastradh * 8174b159fe5Sriastradh * Also print out a symbolic name, if we know it. 8184b159fe5Sriastradh */ 8194b159fe5Sriastradh apei_cper_guid_dec(gede->SectionType, §ype); 8204b159fe5Sriastradh apei_format_guid(§ype, guidstr); 8214b159fe5Sriastradh for (i = 0; i < __arraycount(apei_cper_reports); i++) { 8224b159fe5Sriastradh const struct apei_cper_report *const report = 8234b159fe5Sriastradh &apei_cper_reports[i]; 8244b159fe5Sriastradh 8254b159fe5Sriastradh if (memcmp(§ype, report->type, sizeof(sectype)) != 0) 8264b159fe5Sriastradh continue; 82709ddc9b2Sriastradh if (ratelimitok) { 82809ddc9b2Sriastradh device_printf(sc->sc_dev, "%s:" 82909ddc9b2Sriastradh " SectionType=%s (%s error)\n", 8304b159fe5Sriastradh ctx, guidstr, report->name); 83109ddc9b2Sriastradh } 8324b159fe5Sriastradh *reportp = report; 8334b159fe5Sriastradh break; 8344b159fe5Sriastradh } 8354b159fe5Sriastradh if (i == __arraycount(apei_cper_reports)) { 83609ddc9b2Sriastradh if (ratelimitok) { 8374b159fe5Sriastradh device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx, 8384b159fe5Sriastradh guidstr); 83909ddc9b2Sriastradh } 8404b159fe5Sriastradh *reportp = NULL; 8414b159fe5Sriastradh } 8424b159fe5Sriastradh 8434b159fe5Sriastradh /* 8444b159fe5Sriastradh * Print the numeric severity and, if we have it, a symbolic 8454b159fe5Sriastradh * name for it. 8464b159fe5Sriastradh */ 84709ddc9b2Sriastradh if (ratelimitok) { 84809ddc9b2Sriastradh device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", 84909ddc9b2Sriastradh ctx, 8504b159fe5Sriastradh gede->ErrorSeverity, 8514b159fe5Sriastradh (gede->ErrorSeverity < __arraycount(apei_gede_severity) 8524b159fe5Sriastradh ? apei_gede_severity[gede->ErrorSeverity] 8534b159fe5Sriastradh : "unknown")); 85409ddc9b2Sriastradh } 8554b159fe5Sriastradh 8564b159fe5Sriastradh /* 8574b159fe5Sriastradh * The Revision may not often be useful, but this is only ever 8584b159fe5Sriastradh * shown at the time of a hardware error report, not something 8594b159fe5Sriastradh * you can glean at your convenience with acpidump. So print 8604b159fe5Sriastradh * it anyway. 8614b159fe5Sriastradh */ 86209ddc9b2Sriastradh if (ratelimitok) { 8634b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx, 8644b159fe5Sriastradh gede->Revision); 86509ddc9b2Sriastradh } 8664b159fe5Sriastradh 8674b159fe5Sriastradh /* 8684b159fe5Sriastradh * Don't touch anything past the Revision until we've 8694b159fe5Sriastradh * determined we understand it. Return the header length to 8704b159fe5Sriastradh * the caller, or return zero -- and stop here -- if we don't 8714b159fe5Sriastradh * know what the actual header length is. 8724b159fe5Sriastradh */ 8734b159fe5Sriastradh if (gede->Revision < 0x0300) { 8744b159fe5Sriastradh *headerlenp = sizeof(*gede); 8754b159fe5Sriastradh } else if (gede->Revision < 0x0400) { 8764b159fe5Sriastradh *headerlenp = sizeof(*gede_v3); 8774b159fe5Sriastradh } else { 8784b159fe5Sriastradh *headerlenp = 0; 8794b159fe5Sriastradh return; 8804b159fe5Sriastradh } 8814b159fe5Sriastradh 8824b159fe5Sriastradh /* 8834b159fe5Sriastradh * Print the validation bits at debug level. Only really 8844b159fe5Sriastradh * helpful if there are bits we _don't_ know about. 8854b159fe5Sriastradh */ 88609ddc9b2Sriastradh if (ratelimitok) { 8874b159fe5Sriastradh /* XXX define this format somewhere */ 8884b159fe5Sriastradh snprintb(buf, sizeof(buf), "\177\020" 8894b159fe5Sriastradh "b\000" "FRU_ID\0" 8904b159fe5Sriastradh "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */ 8914b159fe5Sriastradh "b\002" "TIMESTAMP\0" 8924b159fe5Sriastradh "\0", gede->ValidationBits); 89309ddc9b2Sriastradh aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, 89409ddc9b2Sriastradh buf); 89509ddc9b2Sriastradh } 8964b159fe5Sriastradh 8974b159fe5Sriastradh /* 8984b159fe5Sriastradh * Print the CPER section flags. 8994b159fe5Sriastradh */ 90009ddc9b2Sriastradh if (ratelimitok) { 90109ddc9b2Sriastradh snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, 90209ddc9b2Sriastradh gede->Flags); 9034b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); 90409ddc9b2Sriastradh } 9054b159fe5Sriastradh 9064b159fe5Sriastradh /* 9074b159fe5Sriastradh * The ErrorDataLength is unlikely to be useful for the log, so 9084b159fe5Sriastradh * print it at debug level only. 9094b159fe5Sriastradh */ 91009ddc9b2Sriastradh if (ratelimitok) { 91109ddc9b2Sriastradh aprint_debug_dev(sc->sc_dev, "%s:" 91209ddc9b2Sriastradh " ErrorDataLength=0x%"PRIu32"\n", 9134b159fe5Sriastradh ctx, gede->ErrorDataLength); 91409ddc9b2Sriastradh } 9154b159fe5Sriastradh 9164b159fe5Sriastradh /* 9174b159fe5Sriastradh * Print the FRU Id and text, if available. 9184b159fe5Sriastradh */ 91909ddc9b2Sriastradh if (ratelimitok && 92009ddc9b2Sriastradh (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) { 9214b159fe5Sriastradh struct uuid fruid; 9224b159fe5Sriastradh 9234b159fe5Sriastradh apei_cper_guid_dec(gede->FruId, &fruid); 9244b159fe5Sriastradh apei_format_guid(&fruid, guidstr); 9254b159fe5Sriastradh device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); 9264b159fe5Sriastradh } 92709ddc9b2Sriastradh if (ratelimitok && 92809ddc9b2Sriastradh (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) { 9294b159fe5Sriastradh device_printf(sc->sc_dev, "%s: FruText=%.20s\n", 9304b159fe5Sriastradh ctx, gede->FruText); 9314b159fe5Sriastradh } 9324b159fe5Sriastradh 9334b159fe5Sriastradh /* 9344b159fe5Sriastradh * Print the timestamp, if available by the revision number and 9354b159fe5Sriastradh * the validation bits. 9364b159fe5Sriastradh */ 93709ddc9b2Sriastradh if (ratelimitok && 93809ddc9b2Sriastradh gede->Revision >= 0x0300 && gede->Revision < 0x0400 && 9394b159fe5Sriastradh gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 9404b159fe5Sriastradh const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp; 9414b159fe5Sriastradh const uint8_t s = t[0]; 9424b159fe5Sriastradh const uint8_t m = t[1]; 9434b159fe5Sriastradh const uint8_t h = t[2]; 9444b159fe5Sriastradh const uint8_t f = t[3]; 9454b159fe5Sriastradh const uint8_t D = t[4]; 9464b159fe5Sriastradh const uint8_t M = t[5]; 9474b159fe5Sriastradh const uint8_t Y = t[6]; 9484b159fe5Sriastradh const uint8_t C = t[7]; 9494b159fe5Sriastradh 9504b159fe5Sriastradh device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64 9514b159fe5Sriastradh " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n", 9524b159fe5Sriastradh ctx, gede_v3->TimeStamp, 9534b159fe5Sriastradh C,Y, M, D, h,m,s, 9544b159fe5Sriastradh f & __BIT(0) ? " (event time)" : " (collect time)"); 9554b159fe5Sriastradh } 9564b159fe5Sriastradh } 9574b159fe5Sriastradh 9584b159fe5Sriastradh /* 95909ddc9b2Sriastradh * apei_gesb_ratelimit 96009ddc9b2Sriastradh * 96109ddc9b2Sriastradh * State to limit the rate of console log messages about hardware 96209ddc9b2Sriastradh * errors. For each of the four severity levels in a Generic 96309ddc9b2Sriastradh * Error Status Block, 96409ddc9b2Sriastradh * 96509ddc9b2Sriastradh * 0 - Recoverable (uncorrectable), 96609ddc9b2Sriastradh * 1 - Fatal (uncorrectable), 96709ddc9b2Sriastradh * 2 - Corrected, and 96809ddc9b2Sriastradh * 3 - None (including ill-formed errors), 96909ddc9b2Sriastradh * 97009ddc9b2Sriastradh * we record the last time it happened, protected by a CPU simple 97109ddc9b2Sriastradh * lock that we only try-acquire so it is safe to use in any 97209ddc9b2Sriastradh * context, including non-maskable interrupt context. 97309ddc9b2Sriastradh */ 97409ddc9b2Sriastradh 97509ddc9b2Sriastradh static struct { 97609ddc9b2Sriastradh __cpu_simple_lock_t lock; 97709ddc9b2Sriastradh struct timeval lasttime; 97809ddc9b2Sriastradh volatile uint32_t suppressed; 97909ddc9b2Sriastradh } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = { 98009ddc9b2Sriastradh [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED }, 98109ddc9b2Sriastradh [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED }, 98209ddc9b2Sriastradh [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED }, 98309ddc9b2Sriastradh [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED }, 98409ddc9b2Sriastradh }; 98509ddc9b2Sriastradh 98609ddc9b2Sriastradh static void 98709ddc9b2Sriastradh atomic_incsat_32(volatile uint32_t *p) 98809ddc9b2Sriastradh { 98909ddc9b2Sriastradh uint32_t o, n; 99009ddc9b2Sriastradh 99109ddc9b2Sriastradh do { 99209ddc9b2Sriastradh o = atomic_load_relaxed(p); 99309ddc9b2Sriastradh if (__predict_false(o == UINT_MAX)) 99409ddc9b2Sriastradh return; 99509ddc9b2Sriastradh n = o + 1; 99609ddc9b2Sriastradh } while (__predict_false(atomic_cas_32(p, o, n) != o)); 99709ddc9b2Sriastradh } 99809ddc9b2Sriastradh 99909ddc9b2Sriastradh /* 100009ddc9b2Sriastradh * apei_gesb_ratecheck(sc, severity, suppressed) 100109ddc9b2Sriastradh * 100209ddc9b2Sriastradh * Check for a rate limit on errors of the specified severity. 100309ddc9b2Sriastradh * 100409ddc9b2Sriastradh * => Return true if the error should be printed, and format into 100509ddc9b2Sriastradh * the buffer suppressed a message saying how many errors were 100609ddc9b2Sriastradh * previously suppressed. 100709ddc9b2Sriastradh * 100809ddc9b2Sriastradh * => Return false if the error should be suppressed because the 100909ddc9b2Sriastradh * last one printed was too recent. 101009ddc9b2Sriastradh */ 101109ddc9b2Sriastradh static bool 101209ddc9b2Sriastradh apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity, 101309ddc9b2Sriastradh char suppressed[static sizeof(" (4294967295 or more errors suppressed)")]) 101409ddc9b2Sriastradh { 101509ddc9b2Sriastradh /* one of each type per minute (XXX worth making configurable?) */ 101609ddc9b2Sriastradh const struct timeval mininterval = {60, 0}; 101709ddc9b2Sriastradh unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */ 101809ddc9b2Sriastradh bool ok = false; 101909ddc9b2Sriastradh 102009ddc9b2Sriastradh /* 102109ddc9b2Sriastradh * If the lock is contended, the rate limit is probably 102209ddc9b2Sriastradh * exceeded, so it's not OK to print. 102309ddc9b2Sriastradh * 102409ddc9b2Sriastradh * Otherwise, with the lock held, ask ratecheck(9) whether it's 102509ddc9b2Sriastradh * OK to print. 102609ddc9b2Sriastradh */ 102709ddc9b2Sriastradh if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock)) 102809ddc9b2Sriastradh goto out; 102909ddc9b2Sriastradh ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval); 103009ddc9b2Sriastradh __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock); 103109ddc9b2Sriastradh 103209ddc9b2Sriastradh out: /* 103309ddc9b2Sriastradh * If it's OK to print, report the number of errors that were 103409ddc9b2Sriastradh * suppressed. If it's not OK to print, count a suppressed 103509ddc9b2Sriastradh * error. 103609ddc9b2Sriastradh */ 103709ddc9b2Sriastradh if (ok) { 103809ddc9b2Sriastradh const uint32_t n = 103909ddc9b2Sriastradh atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0); 104009ddc9b2Sriastradh 104109ddc9b2Sriastradh if (n == 0) { 104209ddc9b2Sriastradh suppressed[0] = '\0'; 104309ddc9b2Sriastradh } else { 104409ddc9b2Sriastradh snprintf(suppressed, 104509ddc9b2Sriastradh sizeof(" (4294967295 or more errors suppressed)"), 104609ddc9b2Sriastradh " (%u%s error%s suppressed)", 104709ddc9b2Sriastradh n, 104809ddc9b2Sriastradh n == UINT32_MAX ? " or more" : "", 104909ddc9b2Sriastradh n == 1 ? "" : "s"); 105009ddc9b2Sriastradh } 105109ddc9b2Sriastradh } else { 105209ddc9b2Sriastradh atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed); 105309ddc9b2Sriastradh suppressed[0] = '\0'; 105409ddc9b2Sriastradh } 105509ddc9b2Sriastradh return ok; 105609ddc9b2Sriastradh } 105709ddc9b2Sriastradh 105809ddc9b2Sriastradh /* 10594b159fe5Sriastradh * apei_gesb_report(sc, gesb, size, ctx) 10604b159fe5Sriastradh * 10614b159fe5Sriastradh * Check a Generic Error Status Block, of at most the specified 10624b159fe5Sriastradh * size in bytes, and report any errors in it. Return the 32-bit 10634b159fe5Sriastradh * Block Status in case the caller needs it to acknowledge the 10644b159fe5Sriastradh * report to firmware. 10654b159fe5Sriastradh */ 10664b159fe5Sriastradh uint32_t 10674b159fe5Sriastradh apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, 10684b159fe5Sriastradh size_t size, const char *ctx, bool *fatalp) 10694b159fe5Sriastradh { 10704b159fe5Sriastradh uint32_t status, unknownstatus, severity, nentries, i; 10714b159fe5Sriastradh uint32_t datalen, rawdatalen; 10724b159fe5Sriastradh const ACPI_HEST_GENERIC_DATA *gede0, *gede; 10734b159fe5Sriastradh const unsigned char *rawdata; 107409ddc9b2Sriastradh bool ratelimitok = false; 107509ddc9b2Sriastradh char suppressed[sizeof(" (4294967295 or more errors suppressed)")]; 10764b159fe5Sriastradh bool fatal = false; 10774b159fe5Sriastradh 10784b159fe5Sriastradh /* 10794b159fe5Sriastradh * Verify the buffer is large enough for a Generic Error Status 10804b159fe5Sriastradh * Block before we try to touch anything in it. 10814b159fe5Sriastradh */ 10824b159fe5Sriastradh if (size < sizeof(*gesb)) { 108309ddc9b2Sriastradh ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE, 108409ddc9b2Sriastradh suppressed); 108509ddc9b2Sriastradh if (ratelimitok) { 108609ddc9b2Sriastradh device_printf(sc->sc_dev, 108709ddc9b2Sriastradh "%s: truncated GESB, %zu < %zu%s\n", 108809ddc9b2Sriastradh ctx, size, sizeof(*gesb), suppressed); 108909ddc9b2Sriastradh } 1090c8d23cf5Sriastradh status = 0; 1091c8d23cf5Sriastradh goto out; 10924b159fe5Sriastradh } 10934b159fe5Sriastradh size -= sizeof(*gesb); 10944b159fe5Sriastradh 10954b159fe5Sriastradh /* 10964b159fe5Sriastradh * Load the status. Access ordering rules are unclear in the 10974b159fe5Sriastradh * ACPI specification; I'm guessing that load-acquire of the 10984b159fe5Sriastradh * block status is a good idea before any other access to the 10994b159fe5Sriastradh * GESB. 11004b159fe5Sriastradh */ 11014b159fe5Sriastradh status = atomic_load_acquire(&gesb->BlockStatus); 11024b159fe5Sriastradh 11034b159fe5Sriastradh /* 11044b159fe5Sriastradh * If there are no status bits set, the rest of the GESB is 11054b159fe5Sriastradh * garbage, so stop here. 11064b159fe5Sriastradh */ 11074b159fe5Sriastradh if (status == 0) { 11084b159fe5Sriastradh /* XXX dtrace */ 11094b159fe5Sriastradh /* XXX DPRINTF */ 11104b159fe5Sriastradh goto out; 11114b159fe5Sriastradh } 11124b159fe5Sriastradh 111309ddc9b2Sriastradh /* 111409ddc9b2Sriastradh * Read out the severity and get the number of entries in this 111509ddc9b2Sriastradh * status block. 111609ddc9b2Sriastradh */ 111709ddc9b2Sriastradh severity = gesb->ErrorSeverity; 111809ddc9b2Sriastradh nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); 111909ddc9b2Sriastradh 112009ddc9b2Sriastradh /* 112109ddc9b2Sriastradh * Print a message to the console and dmesg about the severity 112209ddc9b2Sriastradh * of the error. 112309ddc9b2Sriastradh */ 112409ddc9b2Sriastradh ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed); 112509ddc9b2Sriastradh if (ratelimitok) { 112609ddc9b2Sriastradh char statusbuf[128]; 112709ddc9b2Sriastradh 11284b159fe5Sriastradh /* XXX define this format somewhere */ 11294b159fe5Sriastradh snprintb(statusbuf, sizeof(statusbuf), "\177\020" 11304b159fe5Sriastradh "b\000" "UE\0" 11314b159fe5Sriastradh "b\001" "CE\0" 11324b159fe5Sriastradh "b\002" "MULTI_UE\0" 11334b159fe5Sriastradh "b\003" "MULTI_CE\0" 11344b159fe5Sriastradh "f\004\010" "GEDE_COUNT\0" 11354b159fe5Sriastradh "\0", status); 11364b159fe5Sriastradh 11374b159fe5Sriastradh if (severity < __arraycount(apei_gesb_severity)) { 113809ddc9b2Sriastradh device_printf(sc->sc_dev, "%s" 113909ddc9b2Sriastradh " reported hardware error%s:" 11404b159fe5Sriastradh " severity=%s nentries=%u status=%s\n", 114109ddc9b2Sriastradh ctx, suppressed, 114209ddc9b2Sriastradh apei_gesb_severity[severity], nentries, statusbuf); 11434b159fe5Sriastradh } else { 114409ddc9b2Sriastradh device_printf(sc->sc_dev, "%s reported error%s:" 11454b159fe5Sriastradh " severity=%"PRIu32" nentries=%u status=%s\n", 114609ddc9b2Sriastradh ctx, suppressed, 114709ddc9b2Sriastradh severity, nentries, statusbuf); 114809ddc9b2Sriastradh } 11494b159fe5Sriastradh } 11504b159fe5Sriastradh 11514b159fe5Sriastradh /* 11524b159fe5Sriastradh * Make a determination about whether the error is fatal. 11534b159fe5Sriastradh * 11544b159fe5Sriastradh * XXX Currently we don't have any mechanism to recover from 11554b159fe5Sriastradh * uncorrectable but recoverable errors, so we treat those -- 11564b159fe5Sriastradh * and anything else we don't recognize -- as fatal. 11574b159fe5Sriastradh */ 11584b159fe5Sriastradh switch (severity) { 11594b159fe5Sriastradh case ACPI_HEST_GEN_ERROR_CORRECTED: 11604b159fe5Sriastradh case ACPI_HEST_GEN_ERROR_NONE: 11614b159fe5Sriastradh fatal = false; 11624b159fe5Sriastradh break; 11634b159fe5Sriastradh case ACPI_HEST_GEN_ERROR_FATAL: 11644b159fe5Sriastradh case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */ 11654b159fe5Sriastradh default: 11664b159fe5Sriastradh fatal = true; 11674b159fe5Sriastradh break; 11684b159fe5Sriastradh } 11694b159fe5Sriastradh 11704b159fe5Sriastradh /* 11714b159fe5Sriastradh * Clear the bits we know about to warn if there's anything 11724b159fe5Sriastradh * left we don't understand. 11734b159fe5Sriastradh */ 11744b159fe5Sriastradh unknownstatus = status; 11754b159fe5Sriastradh unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; 11764b159fe5Sriastradh unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; 11774b159fe5Sriastradh unknownstatus &= ~ACPI_HEST_CORRECTABLE; 11784b159fe5Sriastradh unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; 11794b159fe5Sriastradh unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; 118009ddc9b2Sriastradh if (ratelimitok && unknownstatus != 0) { 11814b159fe5Sriastradh /* XXX dtrace */ 11824b159fe5Sriastradh device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" 11834b159fe5Sriastradh " 0x%"PRIx32"\n", ctx, unknownstatus); 11844b159fe5Sriastradh } 11854b159fe5Sriastradh 11864b159fe5Sriastradh /* 11874b159fe5Sriastradh * Advance past the Generic Error Status Block (GESB) header to 11884b159fe5Sriastradh * the Generic Error Data Entries (GEDEs). 11894b159fe5Sriastradh */ 11904b159fe5Sriastradh gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); 11914b159fe5Sriastradh 11924b159fe5Sriastradh /* 11934b159fe5Sriastradh * Verify that the data length (GEDEs) fits within the size. 11944b159fe5Sriastradh * If not, truncate the GEDEs. 11954b159fe5Sriastradh */ 11964b159fe5Sriastradh datalen = gesb->DataLength; 11974b159fe5Sriastradh if (size < datalen) { 119809ddc9b2Sriastradh if (ratelimitok) { 11994b159fe5Sriastradh device_printf(sc->sc_dev, "%s:" 120009ddc9b2Sriastradh " GESB DataLength exceeds bounds:" 120109ddc9b2Sriastradh " %zu < %"PRIu32"\n", 12024b159fe5Sriastradh ctx, size, datalen); 120309ddc9b2Sriastradh } 12044b159fe5Sriastradh datalen = size; 12054b159fe5Sriastradh } 12064b159fe5Sriastradh size -= datalen; 12074b159fe5Sriastradh 12084b159fe5Sriastradh /* 12094b159fe5Sriastradh * Report each of the Generic Error Data Entries. 12104b159fe5Sriastradh */ 12114b159fe5Sriastradh for (i = 0; i < nentries; i++) { 12124b159fe5Sriastradh size_t headerlen; 12134b159fe5Sriastradh const struct apei_cper_report *report; 12144b159fe5Sriastradh char subctx[128]; 12154b159fe5Sriastradh 12164b159fe5Sriastradh /* 12174b159fe5Sriastradh * Format a subcontext to show this numbered entry of 12184b159fe5Sriastradh * the GESB. 12194b159fe5Sriastradh */ 12204b159fe5Sriastradh snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); 12214b159fe5Sriastradh 12224b159fe5Sriastradh /* 12234b159fe5Sriastradh * If the remaining GESB data length isn't enough for a 12244b159fe5Sriastradh * GEDE header, stop here. 12254b159fe5Sriastradh */ 12264b159fe5Sriastradh if (datalen < sizeof(*gede)) { 122709ddc9b2Sriastradh if (ratelimitok) { 12284b159fe5Sriastradh device_printf(sc->sc_dev, "%s:" 12294b159fe5Sriastradh " truncated GEDE: %"PRIu32" < %zu bytes\n", 12304b159fe5Sriastradh subctx, datalen, sizeof(*gede)); 123109ddc9b2Sriastradh } 12324b159fe5Sriastradh break; 12334b159fe5Sriastradh } 12344b159fe5Sriastradh 12354b159fe5Sriastradh /* 12364b159fe5Sriastradh * Print the GEDE header and get the full length (may 12374b159fe5Sriastradh * vary from revision to revision of the GEDE) and the 12384b159fe5Sriastradh * CPER report function if possible. 12394b159fe5Sriastradh */ 124009ddc9b2Sriastradh apei_gede_report_header(sc, gede, subctx, ratelimitok, 12414b159fe5Sriastradh &headerlen, &report); 12424b159fe5Sriastradh 12434b159fe5Sriastradh /* 12444b159fe5Sriastradh * If we don't know the header length because of an 12454b159fe5Sriastradh * unfamiliar revision, stop here. 12464b159fe5Sriastradh */ 12474b159fe5Sriastradh if (headerlen == 0) { 124809ddc9b2Sriastradh if (ratelimitok) { 12494b159fe5Sriastradh device_printf(sc->sc_dev, "%s:" 12504b159fe5Sriastradh " unknown revision: 0x%"PRIx16"\n", 12514b159fe5Sriastradh subctx, gede->Revision); 125209ddc9b2Sriastradh } 12534b159fe5Sriastradh break; 12544b159fe5Sriastradh } 12554b159fe5Sriastradh 12564b159fe5Sriastradh /* 12574b159fe5Sriastradh * Stop here if what we mapped is too small for the 12584b159fe5Sriastradh * error data length. 12594b159fe5Sriastradh */ 12604b159fe5Sriastradh datalen -= headerlen; 12614b159fe5Sriastradh if (datalen < gede->ErrorDataLength) { 126209ddc9b2Sriastradh if (ratelimitok) { 126309ddc9b2Sriastradh device_printf(sc->sc_dev, "%s:" 126409ddc9b2Sriastradh " truncated GEDE payload:" 12654b159fe5Sriastradh " %"PRIu32" < %"PRIu32" bytes\n", 12664b159fe5Sriastradh subctx, datalen, gede->ErrorDataLength); 126709ddc9b2Sriastradh } 12684b159fe5Sriastradh break; 12694b159fe5Sriastradh } 12704b159fe5Sriastradh 12714b159fe5Sriastradh /* 12724b159fe5Sriastradh * Report the Common Platform Error Record appendix to 12734b159fe5Sriastradh * this Generic Error Data Entry. 12744b159fe5Sriastradh */ 12754b159fe5Sriastradh if (report == NULL) { 127609ddc9b2Sriastradh if (ratelimitok) { 127709ddc9b2Sriastradh device_printf(sc->sc_dev, "%s:" 127809ddc9b2Sriastradh " [unknown type]\n", ctx); 127909ddc9b2Sriastradh } 12804b159fe5Sriastradh } else { 128109ddc9b2Sriastradh /* XXX pass ratelimit through */ 12824b159fe5Sriastradh (*report->func)(sc, (const char *)gede + headerlen, 128309ddc9b2Sriastradh gede->ErrorDataLength, subctx, ratelimitok); 12844b159fe5Sriastradh } 12854b159fe5Sriastradh 12864b159fe5Sriastradh /* 12874b159fe5Sriastradh * Advance past the GEDE header and CPER data to the 12884b159fe5Sriastradh * next GEDE. 12894b159fe5Sriastradh */ 12904b159fe5Sriastradh gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + 12914b159fe5Sriastradh + headerlen + gede->ErrorDataLength); 12924b159fe5Sriastradh } 12934b159fe5Sriastradh 12944b159fe5Sriastradh /* 12954b159fe5Sriastradh * Advance past the Generic Error Data Entries (GEDEs) to the 12964b159fe5Sriastradh * raw error data. 12974b159fe5Sriastradh * 12984b159fe5Sriastradh * XXX Provide Max Raw Data Length as a parameter, as found in 12994b159fe5Sriastradh * various HEST entry types. 13004b159fe5Sriastradh */ 13014b159fe5Sriastradh rawdata = (const unsigned char *)gede0 + datalen; 13024b159fe5Sriastradh 13034b159fe5Sriastradh /* 13044b159fe5Sriastradh * Verify that the raw data length fits within the size. If 13054b159fe5Sriastradh * not, truncate the raw data. 13064b159fe5Sriastradh */ 13074b159fe5Sriastradh rawdatalen = gesb->RawDataLength; 13084b159fe5Sriastradh if (size < rawdatalen) { 130909ddc9b2Sriastradh if (ratelimitok) { 13104b159fe5Sriastradh device_printf(sc->sc_dev, "%s:" 131109ddc9b2Sriastradh " GESB RawDataLength exceeds bounds:" 131209ddc9b2Sriastradh " %zu < %"PRIu32"\n", 13134b159fe5Sriastradh ctx, size, rawdatalen); 131409ddc9b2Sriastradh } 13154b159fe5Sriastradh rawdatalen = size; 13164b159fe5Sriastradh } 13174b159fe5Sriastradh size -= rawdatalen; 13184b159fe5Sriastradh 13194b159fe5Sriastradh /* 13204b159fe5Sriastradh * Hexdump the raw data, if any. 13214b159fe5Sriastradh */ 132209ddc9b2Sriastradh if (ratelimitok && rawdatalen > 0) { 13234b159fe5Sriastradh char devctx[128]; 13244b159fe5Sriastradh 13254b159fe5Sriastradh snprintf(devctx, sizeof(devctx), "%s: %s: raw data", 13264b159fe5Sriastradh device_xname(sc->sc_dev), ctx); 13274b159fe5Sriastradh hexdump(printf, devctx, rawdata, rawdatalen); 13284b159fe5Sriastradh } 13294b159fe5Sriastradh 13304b159fe5Sriastradh /* 13314b159fe5Sriastradh * If there's anything left after the raw data, warn. 13324b159fe5Sriastradh */ 133309ddc9b2Sriastradh if (ratelimitok && size > 0) { 13344b159fe5Sriastradh device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", 13354b159fe5Sriastradh ctx, size); 13364b159fe5Sriastradh } 13374b159fe5Sriastradh 13384b159fe5Sriastradh /* 13394b159fe5Sriastradh * Return the status so the caller can ack it, and tell the 13404b159fe5Sriastradh * caller whether this error is fatal. 13414b159fe5Sriastradh */ 13424b159fe5Sriastradh out: *fatalp = fatal; 13434b159fe5Sriastradh return status; 13444b159fe5Sriastradh } 13454b159fe5Sriastradh 13464b159fe5Sriastradh MODULE(MODULE_CLASS_DRIVER, apei, NULL); 13474b159fe5Sriastradh 13484b159fe5Sriastradh #ifdef _MODULE 13494b159fe5Sriastradh #include "ioconf.c" 13504b159fe5Sriastradh #endif 13514b159fe5Sriastradh 13524b159fe5Sriastradh static int 13534b159fe5Sriastradh apei_modcmd(modcmd_t cmd, void *opaque) 13544b159fe5Sriastradh { 13554b159fe5Sriastradh int error = 0; 13564b159fe5Sriastradh 13574b159fe5Sriastradh switch (cmd) { 13584b159fe5Sriastradh case MODULE_CMD_INIT: 13594b159fe5Sriastradh #ifdef _MODULE 13604b159fe5Sriastradh error = config_init_component(cfdriver_ioconf_apei, 13614b159fe5Sriastradh cfattach_ioconf_apei, cfdata_ioconf_apei); 13624b159fe5Sriastradh #endif 13634b159fe5Sriastradh return error; 13644b159fe5Sriastradh case MODULE_CMD_FINI: 13654b159fe5Sriastradh #ifdef _MODULE 13664b159fe5Sriastradh error = config_fini_component(cfdriver_ioconf_apei, 13674b159fe5Sriastradh cfattach_ioconf_apei, cfdata_ioconf_apei); 13684b159fe5Sriastradh #endif 13694b159fe5Sriastradh return error; 13704b159fe5Sriastradh default: 13714b159fe5Sriastradh return ENOTTY; 13724b159fe5Sriastradh } 13734b159fe5Sriastradh } 1374