1 /* $NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2024 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * APEI: ACPI Platform Error Interface 31 * 32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html 33 * 34 * XXX dtrace probes 35 * 36 * XXX call _OSC appropriately to announce to the platform that we, the 37 * OSPM, support APEI 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $"); 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 46 #include <sys/atomic.h> 47 #include <sys/endian.h> 48 #include <sys/device.h> 49 #include <sys/module.h> 50 #include <sys/sysctl.h> 51 #include <sys/uuid.h> 52 53 #include <dev/acpi/acpireg.h> 54 #include <dev/acpi/acpivar.h> 55 #include <dev/acpi/apei_bertvar.h> 56 #include <dev/acpi/apei_cper.h> 57 #include <dev/acpi/apei_einjvar.h> 58 #include <dev/acpi/apei_erstvar.h> 59 #include <dev/acpi/apei_hestvar.h> 60 #include <dev/acpi/apei_interp.h> 61 #include <dev/acpi/apeivar.h> 62 #include <dev/pci/pcireg.h> 63 64 #define _COMPONENT ACPI_RESOURCE_COMPONENT 65 ACPI_MODULE_NAME ("apei") 66 67 static int apei_match(device_t, cfdata_t, void *); 68 static void apei_attach(device_t, device_t, void *); 69 static int apei_detach(device_t, int); 70 71 static void apei_get_tables(struct apei_tab *); 72 static void apei_put_tables(struct apei_tab *); 73 74 static void apei_identify(struct apei_softc *, const char *, 75 const ACPI_TABLE_HEADER *); 76 77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), 78 apei_match, apei_attach, apei_detach, NULL); 79 80 static int 81 apei_match(device_t parent, cfdata_t match, void *aux) 82 { 83 struct apei_tab tab; 84 int prio = 0; 85 86 /* 87 * If we have any of the APEI tables, match. 88 */ 89 apei_get_tables(&tab); 90 if (tab.bert || tab.einj || tab.erst || tab.hest) 91 prio = 1; 92 apei_put_tables(&tab); 93 94 return prio; 95 } 96 97 static void 98 apei_attach(device_t parent, device_t self, void *aux) 99 { 100 struct apei_softc *sc = device_private(self); 101 const struct sysctlnode *sysctl_hw_acpi; 102 int error; 103 104 aprint_naive("\n"); 105 aprint_normal(": ACPI Platform Error Interface\n"); 106 107 pmf_device_register(self, NULL, NULL); 108 109 sc->sc_dev = self; 110 apei_get_tables(&sc->sc_tab); 111 112 /* 113 * Get the sysctl hw.acpi node. This should already be created 114 * but I don't see an easy way to get at it. If this fails, 115 * something is seriously wrong, so let's stop here. 116 */ 117 error = sysctl_createv(&sc->sc_sysctllog, 0, 118 NULL, &sysctl_hw_acpi, 0, 119 CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, 120 CTL_HW, CTL_CREATE, CTL_EOL); 121 if (error) { 122 aprint_error_dev(sc->sc_dev, 123 "failed to create sysctl hw.acpi: %d\n", error); 124 return; 125 } 126 127 /* 128 * Create sysctl hw.acpi.apei. 129 */ 130 error = sysctl_createv(&sc->sc_sysctllog, 0, 131 &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, 132 CTLTYPE_NODE, "apei", 133 SYSCTL_DESCR("ACPI Platform Error Interface"), 134 NULL, 0, NULL, 0, 135 CTL_CREATE, CTL_EOL); 136 if (error) { 137 aprint_error_dev(sc->sc_dev, 138 "failed to create sysctl hw.acpi.apei: %d\n", error); 139 return; 140 } 141 142 /* 143 * Set up BERT, EINJ, ERST, and HEST. 144 */ 145 if (sc->sc_tab.bert) { 146 apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); 147 apei_bert_attach(sc); 148 } 149 if (sc->sc_tab.einj) { 150 apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); 151 apei_einj_attach(sc); 152 } 153 if (sc->sc_tab.erst) { 154 apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); 155 apei_erst_attach(sc); 156 } 157 if (sc->sc_tab.hest) { 158 apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); 159 apei_hest_attach(sc); 160 } 161 } 162 163 static int 164 apei_detach(device_t self, int flags) 165 { 166 struct apei_softc *sc = device_private(self); 167 int error; 168 169 /* 170 * Detach children. We don't currently have any but this is 171 * harmless without children and mandatory if we ever sprouted 172 * them, so let's just leave it here for good measure. 173 * 174 * After this point, we are committed to detaching; failure is 175 * forbidden. 176 */ 177 error = config_detach_children(self, flags); 178 if (error) 179 return error; 180 181 /* 182 * Tear down all the sysctl nodes first, before the software 183 * state backing them goes away. 184 */ 185 sysctl_teardown(&sc->sc_sysctllog); 186 sc->sc_sysctlroot = NULL; 187 188 /* 189 * Detach the software state for the APEI tables. 190 */ 191 if (sc->sc_tab.hest) 192 apei_hest_detach(sc); 193 if (sc->sc_tab.erst) 194 apei_erst_detach(sc); 195 if (sc->sc_tab.einj) 196 apei_einj_detach(sc); 197 if (sc->sc_tab.bert) 198 apei_bert_detach(sc); 199 200 /* 201 * Release the APEI tables and we're done. 202 */ 203 apei_put_tables(&sc->sc_tab); 204 pmf_device_deregister(self); 205 return 0; 206 } 207 208 /* 209 * apei_get_tables(tab) 210 * 211 * Get references to whichever APEI-related tables -- BERT, EINJ, 212 * ERST, HEST -- are available in the system. 213 */ 214 static void 215 apei_get_tables(struct apei_tab *tab) 216 { 217 ACPI_STATUS rv; 218 219 /* 220 * Probe the BERT -- Boot Error Record Table. 221 */ 222 rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); 223 if (ACPI_FAILURE(rv)) 224 tab->bert = NULL; 225 226 /* 227 * Probe the EINJ -- Error Injection Table. 228 */ 229 rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); 230 if (ACPI_FAILURE(rv)) 231 tab->einj = NULL; 232 233 /* 234 * Probe the ERST -- Error Record Serialization Table. 235 */ 236 rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); 237 if (ACPI_FAILURE(rv)) 238 tab->erst = NULL; 239 240 /* 241 * Probe the HEST -- Hardware Error Source Table. 242 */ 243 rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); 244 if (ACPI_FAILURE(rv)) 245 tab->hest = NULL; 246 } 247 248 /* 249 * apei_put_tables(tab) 250 * 251 * Release the tables acquired by apei_get_tables. 252 */ 253 static void 254 apei_put_tables(struct apei_tab *tab) 255 { 256 257 if (tab->bert != NULL) { 258 AcpiPutTable(&tab->bert->Header); 259 tab->bert = NULL; 260 } 261 if (tab->einj != NULL) { 262 AcpiPutTable(&tab->einj->Header); 263 tab->einj = NULL; 264 } 265 if (tab->erst != NULL) { 266 AcpiPutTable(&tab->erst->Header); 267 tab->erst = NULL; 268 } 269 if (tab->hest != NULL) { 270 AcpiPutTable(&tab->hest->Header); 271 tab->hest = NULL; 272 } 273 } 274 275 /* 276 * apei_identify(sc, name, header) 277 * 278 * Identify the APEI-related table header for dmesg. 279 */ 280 static void 281 apei_identify(struct apei_softc *sc, const char *name, 282 const ACPI_TABLE_HEADER *h) 283 { 284 285 aprint_normal_dev(sc->sc_dev, "%s:" 286 " OemId <%6.6s,%8.8s,%08x>" 287 " AslId <%4.4s,%08x>\n", 288 name, 289 h->OemId, h->OemTableId, h->OemRevision, 290 h->AslCompilerId, h->AslCompilerRevision); 291 } 292 293 /* 294 * apei_cper_guid_dec(buf, uuid) 295 * 296 * Decode a Common Platform Error Record UUID/GUID from an ACPI 297 * table at buf into a sys/uuid.h struct uuid. 298 */ 299 static void 300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) 301 { 302 303 uuid_dec_le(buf, uuid); 304 } 305 306 /* 307 * apei_format_guid(uuid, s) 308 * 309 * Format a UUID as a string. This uses C initializer notation, 310 * not UUID notation, in order to match the text in the UEFI 311 * specification. 312 */ 313 static void 314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) 315 { 316 317 snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," 318 "{0x%02x,%02x," 319 "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", 320 uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, 321 uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low, 322 uuid->node[0], uuid->node[1], uuid->node[2], 323 uuid->node[3], uuid->node[4], uuid->node[5]); 324 } 325 326 /* 327 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 328 */ 329 330 static const char *const cper_memory_error_type[] = { 331 #define F(LN, SN, V) [LN] = #SN, 332 CPER_MEMORY_ERROR_TYPES(F) 333 #undef F 334 }; 335 336 /* 337 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block 338 * 339 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this 340 * but are designated as being intended for Generic Error Data Entries 341 * rather than Generic Error Status Blocks. 342 */ 343 static const char *const apei_gesb_severity[] = { 344 [0] = "recoverable", 345 [1] = "fatal", 346 [2] = "corrected", 347 [3] = "none", 348 }; 349 350 /* 351 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry 352 */ 353 static const char *const apei_gede_severity[] = { 354 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", 355 [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", 356 [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", 357 [ACPI_HEST_GEN_ERROR_NONE] = "none", 358 }; 359 360 /* 361 * N.2.5. Memory Error Section 362 * 363 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 364 */ 365 static const struct uuid CPER_MEMORY_ERROR_SECTION = 366 {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; 367 368 static void 369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, 370 size_t len, const char *ctx, bool ratelimitok) 371 { 372 const struct cper_memory_error *ME = buf; 373 char bitbuf[1024]; 374 375 /* 376 * If we've hit the rate limit, skip printing the error. 377 */ 378 if (!ratelimitok) 379 goto out; 380 381 snprintb(bitbuf, sizeof(bitbuf), 382 CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits); 383 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); 384 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) { 385 /* 386 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status 387 */ 388 /* XXX define this format somewhere */ 389 snprintb(bitbuf, sizeof(bitbuf), "\177\020" 390 "f\010\010" "ErrorType\0" 391 "=\001" "ERR_INTERNAL\0" 392 "=\004" "ERR_MEM\0" 393 "=\005" "ERR_TLB\0" 394 "=\006" "ERR_CACHE\0" 395 "=\007" "ERR_FUNCTION\0" 396 "=\010" "ERR_SELFTEST\0" 397 "=\011" "ERR_FLOW\0" 398 "=\020" "ERR_BUS\0" 399 "=\021" "ERR_MAP\0" 400 "=\022" "ERR_IMPROPER\0" 401 "=\023" "ERR_UNIMPL\0" 402 "=\024" "ERR_LOL\0" 403 "=\025" "ERR_RESPONSE\0" 404 "=\026" "ERR_PARITY\0" 405 "=\027" "ERR_PROTOCOL\0" 406 "=\030" "ERR_ERROR\0" 407 "=\031" "ERR_TIMEOUT\0" 408 "=\032" "ERR_POISONED\0" 409 "b\020" "AddressError\0" 410 "b\021" "ControlError\0" 411 "b\022" "DataError\0" 412 "b\023" "ResponderDetected\0" 413 "b\024" "RequesterDetected\0" 414 "b\025" "FirstError\0" 415 "b\026" "Overflow\0" 416 "\0", ME->ErrorStatus); 417 device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf); 418 } 419 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) { 420 device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n", 421 ctx, ME->PhysicalAddress); 422 } 423 if (ME->ValidationBits & 424 CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) { 425 device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64 426 "\n", ctx, ME->PhysicalAddressMask); 427 } 428 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) { 429 device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx, 430 ME->Node); 431 } 432 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) { 433 device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx, 434 ME->Card); 435 } 436 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) { 437 device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx, 438 ME->Module); 439 } 440 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) { 441 device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx, 442 ME->Bank); 443 } 444 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) { 445 device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx, 446 ME->Device); 447 } 448 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) { 449 device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx, 450 ME->Row); 451 } 452 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) { 453 device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx, 454 ME->Column); 455 } 456 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) { 457 device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n", 458 ctx, ME->BitPosition); 459 } 460 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) { 461 device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n", 462 ctx, ME->RequestorId); 463 } 464 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) { 465 device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n", 466 ctx, ME->ResponderId); 467 } 468 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) { 469 device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n", 470 ctx, ME->TargetId); 471 } 472 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) { 473 const uint8_t t = ME->MemoryErrorType; 474 const char *n = t < __arraycount(cper_memory_error_type) 475 ? cper_memory_error_type[t] : NULL; 476 477 if (n) { 478 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d" 479 " (%s)\n", ctx, t, n); 480 } else { 481 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n", 482 ctx, t); 483 } 484 } 485 486 out: /* 487 * XXX pass this through to uvm(9) or userland for decisions 488 * like page retirement 489 */ 490 return; 491 } 492 493 /* 494 * N.2.7. PCI Express Error Section 495 * 496 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section 497 */ 498 static const struct uuid CPER_PCIE_ERROR_SECTION = 499 {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}}; 500 501 static const char *const cper_pcie_error_port_type[] = { 502 #define F(LN, SN, V) [LN] = #SN, 503 CPER_PCIE_ERROR_PORT_TYPES(F) 504 #undef F 505 }; 506 507 static void 508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len, 509 const char *ctx, bool ratelimitok) 510 { 511 const struct cper_pcie_error *PE = buf; 512 char bitbuf[1024]; 513 514 /* 515 * If we've hit the rate limit, skip printing the error. 516 */ 517 if (!ratelimitok) 518 goto out; 519 520 snprintb(bitbuf, sizeof(bitbuf), 521 CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits); 522 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); 523 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) { 524 const uint32_t t = PE->PortType; 525 const char *n = t < __arraycount(cper_pcie_error_port_type) 526 ? cper_pcie_error_port_type[t] : NULL; 527 528 if (n) { 529 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32 530 " (%s)\n", ctx, t, n); 531 } else { 532 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n", 533 ctx, t); 534 } 535 } 536 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) { 537 /* XXX BCD */ 538 device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n", 539 ctx, PE->Version); 540 } 541 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) { 542 /* XXX move me to pcireg.h */ 543 snprintb(bitbuf, sizeof(bitbuf), "\177\020" 544 /* command */ 545 "b\000" "IO_ENABLE\0" 546 "b\001" "MEM_ENABLE\0" 547 "b\002" "MASTER_ENABLE\0" 548 "b\003" "SPECIAL_ENABLE\0" 549 "b\004" "INVALIDATE_ENABLE\0" 550 "b\005" "PALETTE_ENABLE\0" 551 "b\006" "PARITY_ENABLE\0" 552 "b\007" "STEPPING_ENABLE\0" 553 "b\010" "SERR_ENABLE\0" 554 "b\011" "BACKTOBACK_ENABLE\0" 555 "b\012" "INTERRUPT_DISABLE\0" 556 /* status */ 557 "b\023" "INT_STATUS\0" 558 "b\024" "CAPLIST_SUPPORT\0" 559 "b\025" "66MHZ_SUPPORT\0" 560 "b\026" "UDF_SUPPORT\0" 561 "b\027" "BACKTOBACK_SUPPORT\0" 562 "b\030" "PARITY_ERROR\0" 563 "f\031\002" "DEVSEL\0" 564 "=\000" "FAST\0" 565 "=\001" "MEDIUM\0" 566 "=\002" "SLOW\0" 567 "b\033" "TARGET_TARGET_ABORT\0" 568 "b\034" "MASTER_TARGET_ABORT\0" 569 "b\035" "MASTER_ABORT\0" 570 "b\036" "SPECIAL_ERROR\0" 571 "b\037" "PARITY_DETECT\0" 572 "\0", PE->CommandStatus); 573 device_printf(sc->sc_dev, "%s: CommandStatus=%s\n", 574 ctx, bitbuf); 575 } 576 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) { 577 device_printf(sc->sc_dev, "%s: DeviceID:" 578 " VendorID=0x%04"PRIx16 579 " DeviceID=0x%04"PRIx16 580 " ClassCode=0x%06"PRIx32 581 " Function=%"PRIu8 582 " Device=%"PRIu8 583 " Segment=%"PRIu16 584 " Bus=%"PRIu8 585 " SecondaryBus=%"PRIu8 586 " Slot=0x%04"PRIx16 587 " Reserved0=0x%02"PRIx8 588 "\n", 589 ctx, 590 le16dec(PE->DeviceID.VendorID), 591 le16dec(PE->DeviceID.DeviceID), 592 (PE->DeviceID.ClassCode[0] | /* le24dec */ 593 ((uint32_t)PE->DeviceID.ClassCode[1] << 8) | 594 ((uint32_t)PE->DeviceID.ClassCode[2] << 16)), 595 PE->DeviceID.Function, PE->DeviceID.Device, 596 le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus, 597 PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot), 598 PE->DeviceID.Reserved0); 599 } 600 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) { 601 device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n", 602 ctx, PE->DeviceSerial); 603 } 604 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) { 605 /* XXX snprintb */ 606 device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32 607 "\n", ctx, PE->BridgeControlStatus); 608 } 609 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) { 610 uint32_t dcsr, dsr; 611 char hex[9*sizeof(PE->CapabilityStructure)/4]; 612 unsigned i; 613 614 /* 615 * Display a hex dump of each 32-bit register in the 616 * PCIe capability structure. 617 */ 618 __CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0); 619 for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) { 620 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ", 621 le32dec(&PE->CapabilityStructure[4*i])); 622 } 623 hex[sizeof(hex) - 1] = '\0'; 624 device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n", 625 ctx, hex); 626 627 /* 628 * If the Device Status Register has any bits set, 629 * highlight it in particular -- these are probably 630 * error bits. 631 */ 632 dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]); 633 dsr = __SHIFTOUT(dcsr, __BITS(31,16)); 634 if (dsr != 0) { 635 /* 636 * XXX move me to pcireg.h; note: high 637 * half of DCSR 638 */ 639 snprintb(bitbuf, sizeof(bitbuf), "\177\020" 640 "b\000" "CORRECTABLE_ERROR\0" 641 "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0" 642 "b\002" "FATAL_ERROR\0" 643 "b\003" "UNSUPPORTED_REQUEST\0" 644 "b\004" "AUX_POWER\0" 645 "b\005" "TRANSACTIONS_PENDING\0" 646 "\0", dsr); 647 device_printf(sc->sc_dev, "%s: PCIe Device Status:" 648 " %s\n", 649 ctx, bitbuf); 650 } 651 } 652 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) { 653 uint32_t uc_status, uc_sev; 654 uint32_t cor_status; 655 uint32_t control; 656 char hex[9*sizeof(PE->AERInfo)/4]; 657 unsigned i; 658 659 /* 660 * Display a hex dump of each 32-bit register in the 661 * PCIe Advanced Error Reporting extended capability 662 * structure. 663 */ 664 __CTASSERT(sizeof(PE->AERInfo) % 4 == 0); 665 for (i = 0; i < sizeof(PE->AERInfo)/4; i++) { 666 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ", 667 le32dec(&PE->AERInfo[4*i])); 668 } 669 hex[sizeof(hex) - 1] = '\0'; 670 device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex); 671 672 /* XXX move me to pcireg.h */ 673 #define PCI_AER_UC_STATUS_FMT "\177\020" \ 674 "b\000" "UNDEFINED\0" \ 675 "b\004" "DL_PROTOCOL_ERROR\0" \ 676 "b\005" "SURPRISE_DOWN_ERROR\0" \ 677 "b\014" "POISONED_TLP\0" \ 678 "b\015" "FC_PROTOCOL_ERROR\0" \ 679 "b\016" "COMPLETION_TIMEOUT\0" \ 680 "b\017" "COMPLETION_ABORT\0" \ 681 "b\020" "UNEXPECTED_COMPLETION\0" \ 682 "b\021" "RECEIVER_OVERFLOW\0" \ 683 "b\022" "MALFORMED_TLP\0" \ 684 "b\023" "ECRC_ERROR\0" \ 685 "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \ 686 "b\025" "ACS_VIOLATION\0" \ 687 "b\026" "INTERNAL_ERROR\0" \ 688 "b\027" "MC_BLOCKED_TLP\0" \ 689 "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \ 690 "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \ 691 "b\032" "POISONTLP_EGRESS_BLOCKED\0" \ 692 "\0" 693 694 /* 695 * If there are any hardware error status bits set, 696 * highlight them in particular, in three groups: 697 * 698 * - uncorrectable fatal (UC_STATUS and UC_SEVERITY) 699 * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY) 700 * - corrected (COR_STATUS) 701 * 702 * And if there are any uncorrectable errors, show 703 * which one was reported first, according to 704 * CAP_CONTROL. 705 */ 706 uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]); 707 uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]); 708 cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]); 709 control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]); 710 711 if (uc_status & uc_sev) { 712 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 713 uc_status & uc_sev); 714 device_printf(sc->sc_dev, "%s:" 715 " AER hardware fatal uncorrectable errors: %s\n", 716 ctx, bitbuf); 717 } 718 if (uc_status & ~uc_sev) { 719 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 720 uc_status & ~uc_sev); 721 device_printf(sc->sc_dev, "%s:" 722 " AER hardware non-fatal uncorrectable errors:" 723 " %s\n", 724 ctx, bitbuf); 725 } 726 if (uc_status) { 727 unsigned first = __SHIFTOUT(control, 728 PCI_AER_FIRST_ERROR_PTR); 729 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, 730 (uint32_t)1 << first); 731 device_printf(sc->sc_dev, "%s:" 732 " AER hardware first uncorrectable error: %s\n", 733 ctx, bitbuf); 734 } 735 if (cor_status) { 736 /* XXX move me to pcireg.h */ 737 snprintb(bitbuf, sizeof(bitbuf), "\177\020" 738 "b\000" "RECEIVER_ERROR\0" 739 "b\006" "BAD_TLP\0" 740 "b\007" "BAD_DLLP\0" 741 "b\010" "REPLAY_NUM_ROLLOVER\0" 742 "b\014" "REPLAY_TIMER_TIMEOUT\0" 743 "b\015" "ADVISORY_NF_ERROR\0" 744 "b\016" "INTERNAL_ERROR\0" 745 "b\017" "HEADER_LOG_OVERFLOW\0" 746 "\0", cor_status); 747 device_printf(sc->sc_dev, "%s:" 748 " AER hardware corrected error: %s\n", 749 ctx, bitbuf); 750 } 751 } 752 753 out: /* 754 * XXX pass this on to the PCI subsystem to handle 755 */ 756 return; 757 } 758 759 /* 760 * apei_cper_reports 761 * 762 * Table of known Common Platform Error Record types, symbolic 763 * names, minimum data lengths, and functions to report them. 764 * 765 * The section types and corresponding section layouts are listed 766 * at: 767 * 768 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html 769 */ 770 static const struct apei_cper_report { 771 const char *name; 772 const struct uuid *type; 773 size_t minlength; 774 void (*func)(struct apei_softc *, const void *, size_t, const char *, 775 bool); 776 } apei_cper_reports[] = { 777 { "memory", &CPER_MEMORY_ERROR_SECTION, 778 sizeof(struct cper_memory_error), 779 apei_cper_memory_error_report }, 780 { "PCIe", &CPER_PCIE_ERROR_SECTION, 781 sizeof(struct cper_pcie_error), 782 apei_cper_pcie_error_report }, 783 }; 784 785 /* 786 * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report) 787 * 788 * Report the header of the ith Generic Error Data Entry in the 789 * given context, if ratelimitok is true. 790 * 791 * Return the actual length of the header in headerlen, or 0 if 792 * not known because the revision isn't recognized. 793 * 794 * Return the report type in report, or NULL if not known because 795 * the section type isn't recognized. 796 */ 797 static void 798 apei_gede_report_header(struct apei_softc *sc, 799 const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok, 800 size_t *headerlenp, const struct apei_cper_report **reportp) 801 { 802 const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede; 803 struct uuid sectype; 804 char guidstr[69]; 805 char buf[128]; 806 unsigned i; 807 808 /* 809 * Print the section type as a C initializer. It would be 810 * prettier to use standard hyphenated UUID notation, but that 811 * notation is slightly ambiguous here (two octets could be 812 * written either way, depending on Microsoft convention -- 813 * which influenced ACPI and UEFI -- or internet convention), 814 * and the UEFI spec writes the C initializer notation, so this 815 * makes it easier to search for. 816 * 817 * Also print out a symbolic name, if we know it. 818 */ 819 apei_cper_guid_dec(gede->SectionType, §ype); 820 apei_format_guid(§ype, guidstr); 821 for (i = 0; i < __arraycount(apei_cper_reports); i++) { 822 const struct apei_cper_report *const report = 823 &apei_cper_reports[i]; 824 825 if (memcmp(§ype, report->type, sizeof(sectype)) != 0) 826 continue; 827 if (ratelimitok) { 828 device_printf(sc->sc_dev, "%s:" 829 " SectionType=%s (%s error)\n", 830 ctx, guidstr, report->name); 831 } 832 *reportp = report; 833 break; 834 } 835 if (i == __arraycount(apei_cper_reports)) { 836 if (ratelimitok) { 837 device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx, 838 guidstr); 839 } 840 *reportp = NULL; 841 } 842 843 /* 844 * Print the numeric severity and, if we have it, a symbolic 845 * name for it. 846 */ 847 if (ratelimitok) { 848 device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", 849 ctx, 850 gede->ErrorSeverity, 851 (gede->ErrorSeverity < __arraycount(apei_gede_severity) 852 ? apei_gede_severity[gede->ErrorSeverity] 853 : "unknown")); 854 } 855 856 /* 857 * The Revision may not often be useful, but this is only ever 858 * shown at the time of a hardware error report, not something 859 * you can glean at your convenience with acpidump. So print 860 * it anyway. 861 */ 862 if (ratelimitok) { 863 device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx, 864 gede->Revision); 865 } 866 867 /* 868 * Don't touch anything past the Revision until we've 869 * determined we understand it. Return the header length to 870 * the caller, or return zero -- and stop here -- if we don't 871 * know what the actual header length is. 872 */ 873 if (gede->Revision < 0x0300) { 874 *headerlenp = sizeof(*gede); 875 } else if (gede->Revision < 0x0400) { 876 *headerlenp = sizeof(*gede_v3); 877 } else { 878 *headerlenp = 0; 879 return; 880 } 881 882 /* 883 * Print the validation bits at debug level. Only really 884 * helpful if there are bits we _don't_ know about. 885 */ 886 if (ratelimitok) { 887 /* XXX define this format somewhere */ 888 snprintb(buf, sizeof(buf), "\177\020" 889 "b\000" "FRU_ID\0" 890 "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */ 891 "b\002" "TIMESTAMP\0" 892 "\0", gede->ValidationBits); 893 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, 894 buf); 895 } 896 897 /* 898 * Print the CPER section flags. 899 */ 900 if (ratelimitok) { 901 snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, 902 gede->Flags); 903 device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); 904 } 905 906 /* 907 * The ErrorDataLength is unlikely to be useful for the log, so 908 * print it at debug level only. 909 */ 910 if (ratelimitok) { 911 aprint_debug_dev(sc->sc_dev, "%s:" 912 " ErrorDataLength=0x%"PRIu32"\n", 913 ctx, gede->ErrorDataLength); 914 } 915 916 /* 917 * Print the FRU Id and text, if available. 918 */ 919 if (ratelimitok && 920 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) { 921 struct uuid fruid; 922 923 apei_cper_guid_dec(gede->FruId, &fruid); 924 apei_format_guid(&fruid, guidstr); 925 device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); 926 } 927 if (ratelimitok && 928 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) { 929 device_printf(sc->sc_dev, "%s: FruText=%.20s\n", 930 ctx, gede->FruText); 931 } 932 933 /* 934 * Print the timestamp, if available by the revision number and 935 * the validation bits. 936 */ 937 if (ratelimitok && 938 gede->Revision >= 0x0300 && gede->Revision < 0x0400 && 939 gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 940 const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp; 941 const uint8_t s = t[0]; 942 const uint8_t m = t[1]; 943 const uint8_t h = t[2]; 944 const uint8_t f = t[3]; 945 const uint8_t D = t[4]; 946 const uint8_t M = t[5]; 947 const uint8_t Y = t[6]; 948 const uint8_t C = t[7]; 949 950 device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64 951 " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n", 952 ctx, gede_v3->TimeStamp, 953 C,Y, M, D, h,m,s, 954 f & __BIT(0) ? " (event time)" : " (collect time)"); 955 } 956 } 957 958 /* 959 * apei_gesb_ratelimit 960 * 961 * State to limit the rate of console log messages about hardware 962 * errors. For each of the four severity levels in a Generic 963 * Error Status Block, 964 * 965 * 0 - Recoverable (uncorrectable), 966 * 1 - Fatal (uncorrectable), 967 * 2 - Corrected, and 968 * 3 - None (including ill-formed errors), 969 * 970 * we record the last time it happened, protected by a CPU simple 971 * lock that we only try-acquire so it is safe to use in any 972 * context, including non-maskable interrupt context. 973 */ 974 975 static struct { 976 __cpu_simple_lock_t lock; 977 struct timeval lasttime; 978 volatile uint32_t suppressed; 979 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = { 980 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED }, 981 [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED }, 982 [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED }, 983 [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED }, 984 }; 985 986 static void 987 atomic_incsat_32(volatile uint32_t *p) 988 { 989 uint32_t o, n; 990 991 do { 992 o = atomic_load_relaxed(p); 993 if (__predict_false(o == UINT_MAX)) 994 return; 995 n = o + 1; 996 } while (__predict_false(atomic_cas_32(p, o, n) != o)); 997 } 998 999 /* 1000 * apei_gesb_ratecheck(sc, severity, suppressed) 1001 * 1002 * Check for a rate limit on errors of the specified severity. 1003 * 1004 * => Return true if the error should be printed, and format into 1005 * the buffer suppressed a message saying how many errors were 1006 * previously suppressed. 1007 * 1008 * => Return false if the error should be suppressed because the 1009 * last one printed was too recent. 1010 */ 1011 static bool 1012 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity, 1013 char suppressed[static sizeof(" (4294967295 or more errors suppressed)")]) 1014 { 1015 /* one of each type per minute (XXX worth making configurable?) */ 1016 const struct timeval mininterval = {60, 0}; 1017 unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */ 1018 bool ok = false; 1019 1020 /* 1021 * If the lock is contended, the rate limit is probably 1022 * exceeded, so it's not OK to print. 1023 * 1024 * Otherwise, with the lock held, ask ratecheck(9) whether it's 1025 * OK to print. 1026 */ 1027 if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock)) 1028 goto out; 1029 ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval); 1030 __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock); 1031 1032 out: /* 1033 * If it's OK to print, report the number of errors that were 1034 * suppressed. If it's not OK to print, count a suppressed 1035 * error. 1036 */ 1037 if (ok) { 1038 const uint32_t n = 1039 atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0); 1040 1041 if (n == 0) { 1042 suppressed[0] = '\0'; 1043 } else { 1044 snprintf(suppressed, 1045 sizeof(" (4294967295 or more errors suppressed)"), 1046 " (%u%s error%s suppressed)", 1047 n, 1048 n == UINT32_MAX ? " or more" : "", 1049 n == 1 ? "" : "s"); 1050 } 1051 } else { 1052 atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed); 1053 suppressed[0] = '\0'; 1054 } 1055 return ok; 1056 } 1057 1058 /* 1059 * apei_gesb_report(sc, gesb, size, ctx) 1060 * 1061 * Check a Generic Error Status Block, of at most the specified 1062 * size in bytes, and report any errors in it. Return the 32-bit 1063 * Block Status in case the caller needs it to acknowledge the 1064 * report to firmware. 1065 */ 1066 uint32_t 1067 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, 1068 size_t size, const char *ctx, bool *fatalp) 1069 { 1070 uint32_t status, unknownstatus, severity, nentries, i; 1071 uint32_t datalen, rawdatalen; 1072 const ACPI_HEST_GENERIC_DATA *gede0, *gede; 1073 const unsigned char *rawdata; 1074 bool ratelimitok = false; 1075 char suppressed[sizeof(" (4294967295 or more errors suppressed)")]; 1076 bool fatal = false; 1077 1078 /* 1079 * Verify the buffer is large enough for a Generic Error Status 1080 * Block before we try to touch anything in it. 1081 */ 1082 if (size < sizeof(*gesb)) { 1083 ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE, 1084 suppressed); 1085 if (ratelimitok) { 1086 device_printf(sc->sc_dev, 1087 "%s: truncated GESB, %zu < %zu%s\n", 1088 ctx, size, sizeof(*gesb), suppressed); 1089 } 1090 status = 0; 1091 goto out; 1092 } 1093 size -= sizeof(*gesb); 1094 1095 /* 1096 * Load the status. Access ordering rules are unclear in the 1097 * ACPI specification; I'm guessing that load-acquire of the 1098 * block status is a good idea before any other access to the 1099 * GESB. 1100 */ 1101 status = atomic_load_acquire(&gesb->BlockStatus); 1102 1103 /* 1104 * If there are no status bits set, the rest of the GESB is 1105 * garbage, so stop here. 1106 */ 1107 if (status == 0) { 1108 /* XXX dtrace */ 1109 /* XXX DPRINTF */ 1110 goto out; 1111 } 1112 1113 /* 1114 * Read out the severity and get the number of entries in this 1115 * status block. 1116 */ 1117 severity = gesb->ErrorSeverity; 1118 nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); 1119 1120 /* 1121 * Print a message to the console and dmesg about the severity 1122 * of the error. 1123 */ 1124 ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed); 1125 if (ratelimitok) { 1126 char statusbuf[128]; 1127 1128 /* XXX define this format somewhere */ 1129 snprintb(statusbuf, sizeof(statusbuf), "\177\020" 1130 "b\000" "UE\0" 1131 "b\001" "CE\0" 1132 "b\002" "MULTI_UE\0" 1133 "b\003" "MULTI_CE\0" 1134 "f\004\010" "GEDE_COUNT\0" 1135 "\0", status); 1136 1137 if (severity < __arraycount(apei_gesb_severity)) { 1138 device_printf(sc->sc_dev, "%s" 1139 " reported hardware error%s:" 1140 " severity=%s nentries=%u status=%s\n", 1141 ctx, suppressed, 1142 apei_gesb_severity[severity], nentries, statusbuf); 1143 } else { 1144 device_printf(sc->sc_dev, "%s reported error%s:" 1145 " severity=%"PRIu32" nentries=%u status=%s\n", 1146 ctx, suppressed, 1147 severity, nentries, statusbuf); 1148 } 1149 } 1150 1151 /* 1152 * Make a determination about whether the error is fatal. 1153 * 1154 * XXX Currently we don't have any mechanism to recover from 1155 * uncorrectable but recoverable errors, so we treat those -- 1156 * and anything else we don't recognize -- as fatal. 1157 */ 1158 switch (severity) { 1159 case ACPI_HEST_GEN_ERROR_CORRECTED: 1160 case ACPI_HEST_GEN_ERROR_NONE: 1161 fatal = false; 1162 break; 1163 case ACPI_HEST_GEN_ERROR_FATAL: 1164 case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */ 1165 default: 1166 fatal = true; 1167 break; 1168 } 1169 1170 /* 1171 * Clear the bits we know about to warn if there's anything 1172 * left we don't understand. 1173 */ 1174 unknownstatus = status; 1175 unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; 1176 unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; 1177 unknownstatus &= ~ACPI_HEST_CORRECTABLE; 1178 unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; 1179 unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; 1180 if (ratelimitok && unknownstatus != 0) { 1181 /* XXX dtrace */ 1182 device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" 1183 " 0x%"PRIx32"\n", ctx, unknownstatus); 1184 } 1185 1186 /* 1187 * Advance past the Generic Error Status Block (GESB) header to 1188 * the Generic Error Data Entries (GEDEs). 1189 */ 1190 gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); 1191 1192 /* 1193 * Verify that the data length (GEDEs) fits within the size. 1194 * If not, truncate the GEDEs. 1195 */ 1196 datalen = gesb->DataLength; 1197 if (size < datalen) { 1198 if (ratelimitok) { 1199 device_printf(sc->sc_dev, "%s:" 1200 " GESB DataLength exceeds bounds:" 1201 " %zu < %"PRIu32"\n", 1202 ctx, size, datalen); 1203 } 1204 datalen = size; 1205 } 1206 size -= datalen; 1207 1208 /* 1209 * Report each of the Generic Error Data Entries. 1210 */ 1211 for (i = 0; i < nentries; i++) { 1212 size_t headerlen; 1213 const struct apei_cper_report *report; 1214 char subctx[128]; 1215 1216 /* 1217 * Format a subcontext to show this numbered entry of 1218 * the GESB. 1219 */ 1220 snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); 1221 1222 /* 1223 * If the remaining GESB data length isn't enough for a 1224 * GEDE header, stop here. 1225 */ 1226 if (datalen < sizeof(*gede)) { 1227 if (ratelimitok) { 1228 device_printf(sc->sc_dev, "%s:" 1229 " truncated GEDE: %"PRIu32" < %zu bytes\n", 1230 subctx, datalen, sizeof(*gede)); 1231 } 1232 break; 1233 } 1234 1235 /* 1236 * Print the GEDE header and get the full length (may 1237 * vary from revision to revision of the GEDE) and the 1238 * CPER report function if possible. 1239 */ 1240 apei_gede_report_header(sc, gede, subctx, ratelimitok, 1241 &headerlen, &report); 1242 1243 /* 1244 * If we don't know the header length because of an 1245 * unfamiliar revision, stop here. 1246 */ 1247 if (headerlen == 0) { 1248 if (ratelimitok) { 1249 device_printf(sc->sc_dev, "%s:" 1250 " unknown revision: 0x%"PRIx16"\n", 1251 subctx, gede->Revision); 1252 } 1253 break; 1254 } 1255 1256 /* 1257 * Stop here if what we mapped is too small for the 1258 * error data length. 1259 */ 1260 datalen -= headerlen; 1261 if (datalen < gede->ErrorDataLength) { 1262 if (ratelimitok) { 1263 device_printf(sc->sc_dev, "%s:" 1264 " truncated GEDE payload:" 1265 " %"PRIu32" < %"PRIu32" bytes\n", 1266 subctx, datalen, gede->ErrorDataLength); 1267 } 1268 break; 1269 } 1270 1271 /* 1272 * Report the Common Platform Error Record appendix to 1273 * this Generic Error Data Entry. 1274 */ 1275 if (report == NULL) { 1276 if (ratelimitok) { 1277 device_printf(sc->sc_dev, "%s:" 1278 " [unknown type]\n", ctx); 1279 } 1280 } else { 1281 /* XXX pass ratelimit through */ 1282 (*report->func)(sc, (const char *)gede + headerlen, 1283 gede->ErrorDataLength, subctx, ratelimitok); 1284 } 1285 1286 /* 1287 * Advance past the GEDE header and CPER data to the 1288 * next GEDE. 1289 */ 1290 gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + 1291 + headerlen + gede->ErrorDataLength); 1292 } 1293 1294 /* 1295 * Advance past the Generic Error Data Entries (GEDEs) to the 1296 * raw error data. 1297 * 1298 * XXX Provide Max Raw Data Length as a parameter, as found in 1299 * various HEST entry types. 1300 */ 1301 rawdata = (const unsigned char *)gede0 + datalen; 1302 1303 /* 1304 * Verify that the raw data length fits within the size. If 1305 * not, truncate the raw data. 1306 */ 1307 rawdatalen = gesb->RawDataLength; 1308 if (size < rawdatalen) { 1309 if (ratelimitok) { 1310 device_printf(sc->sc_dev, "%s:" 1311 " GESB RawDataLength exceeds bounds:" 1312 " %zu < %"PRIu32"\n", 1313 ctx, size, rawdatalen); 1314 } 1315 rawdatalen = size; 1316 } 1317 size -= rawdatalen; 1318 1319 /* 1320 * Hexdump the raw data, if any. 1321 */ 1322 if (ratelimitok && rawdatalen > 0) { 1323 char devctx[128]; 1324 1325 snprintf(devctx, sizeof(devctx), "%s: %s: raw data", 1326 device_xname(sc->sc_dev), ctx); 1327 hexdump(printf, devctx, rawdata, rawdatalen); 1328 } 1329 1330 /* 1331 * If there's anything left after the raw data, warn. 1332 */ 1333 if (ratelimitok && size > 0) { 1334 device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", 1335 ctx, size); 1336 } 1337 1338 /* 1339 * Return the status so the caller can ack it, and tell the 1340 * caller whether this error is fatal. 1341 */ 1342 out: *fatalp = fatal; 1343 return status; 1344 } 1345 1346 MODULE(MODULE_CLASS_DRIVER, apei, NULL); 1347 1348 #ifdef _MODULE 1349 #include "ioconf.c" 1350 #endif 1351 1352 static int 1353 apei_modcmd(modcmd_t cmd, void *opaque) 1354 { 1355 int error = 0; 1356 1357 switch (cmd) { 1358 case MODULE_CMD_INIT: 1359 #ifdef _MODULE 1360 error = config_init_component(cfdriver_ioconf_apei, 1361 cfattach_ioconf_apei, cfdata_ioconf_apei); 1362 #endif 1363 return error; 1364 case MODULE_CMD_FINI: 1365 #ifdef _MODULE 1366 error = config_fini_component(cfdriver_ioconf_apei, 1367 cfattach_ioconf_apei, cfdata_ioconf_apei); 1368 #endif 1369 return error; 1370 default: 1371 return ENOTTY; 1372 } 1373 } 1374