xref: /netbsd-src/sys/dev/acpi/apei_cper.h (revision 70f84647259b895b87903c90de73a5800fe70c23)
1 /*	$NetBSD: apei_cper.h,v 1.5 2024/10/27 12:59:08 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * UEFI Common Platform Error Record
31  *
32  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
33  */
34 
35 #ifndef	_SYS_DEV_ACPI_APEI_CPER_H_
36 #define	_SYS_DEV_ACPI_APEI_CPER_H_
37 
38 #include <sys/types.h>
39 
40 #include <sys/cdefs.h>
41 
42 /*
43  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#record-header
44  */
45 struct cper_header {
46 	char		SignatureStart[4];	/* `CPER' */
47 	uint16_t	Revision;
48 	uint32_t	SignatureEnd;		/* 0xffffffff */
49 	uint16_t	SectionCount;
50 	uint32_t	ErrorSeverity;
51 	uint32_t	ValidationBits;
52 	uint32_t	RecordLength;
53 	uint64_t	Timestamp;
54 	uint8_t		PlatformId[16];
55 	uint8_t		PartitionId[16];
56 	uint8_t		CreatorId[16];
57 	uint8_t		NotificationType[16];
58 	uint64_t	RecordId;
59 	uint32_t	Flags;
60 	uint64_t	PersistenceInfo;
61 	uint8_t		Reserved[12];
62 } __packed;
63 __CTASSERT(sizeof(struct cper_header) == 128);
64 
65 enum {				/* struct cper_header::ErrorSeverity */
66 	CPER_ERROR_SEVERITY_RECOVERABLE		= 0,
67 	CPER_ERROR_SEVERITY_FATAL		= 1,
68 	CPER_ERROR_SEVERITY_CORRECTED		= 2,
69 	CPER_ERROR_SEVERITY_INFORMATIONAL	= 3,
70 };
71 
72 enum {				/* struct cper_header::ValidationBits */
73 	CPER_VALID_PLATFORM_ID		= __BIT(0),
74 	CPER_VALID_TIMESTAMP		= __BIT(1),
75 	CPER_VALID_PARTITION_ID		= __BIT(2),
76 };
77 
78 /*
79  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-record-header-flags
80  */
81 enum {				/* struct cper_header::Flags */
82 	CPER_HW_ERROR_FLAG_RECOVERED	= __BIT(0),
83 	CPER_HW_ERROR_FLAG_PREVERR	= __BIT(1),
84 	CPER_HW_ERROR_FLAG_SIMULATED	= __BIT(2),
85 };
86 
87 /*
88  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#section-descriptor-format
89  */
90 enum {
91 	CPER_SECTION_FLAG_PRIMARY			= __BIT(0),
92 	CPER_SECTION_FLAG_CONTAINMENT_WARNING		= __BIT(1),
93 	CPER_SECTION_FLAG_RESET				= __BIT(2),
94 	CPER_SECTION_FLAG_ERROR_THRESHOLD_EXCEEDED	= __BIT(3),
95 	CPER_SECTION_FLAG_RESOURCE_NOT_ACCESSIBLE	= __BIT(4),
96 	CPER_SECTION_FLAG_LATENT_ERROR			= __BIT(5),
97 	CPER_SECTION_FLAG_PROPAGATED			= __BIT(6),
98 	CPER_SECTION_FLAG_OVERFLOW			= __BIT(7),
99 };
100 
101 #define	CPER_SECTION_FLAGS_FMT	"\177\020"				      \
102 	"b\000"	"PRIMARY\0"						      \
103 	"b\001"	"CONTAINMENT_WARNING\0"					      \
104 	"b\002"	"RESET\0"						      \
105 	"b\003"	"ERROR_THRESHOLD_EXCEEDED\0"				      \
106 	"b\004"	"RESOURCE_NOT_ACCESSIBLE\0"				      \
107 	"b\005"	"LATENT_ERROR\0"					      \
108 	"b\006"	"PROPAGATED\0"						      \
109 	"b\007"	"OVERFLOW\0"						      \
110 	"\0"
111 
112 /*
113  * N.2.5. Memory Error Section
114  *
115  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
116  *
117  * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb1}}
118  */
119 
120 struct cper_memory_error {
121 	uint64_t	ValidationBits;
122 	uint64_t	ErrorStatus;
123 	uint64_t	PhysicalAddress;
124 	uint64_t	PhysicalAddressMask;
125 	uint16_t	Node;
126 	uint16_t	Card;
127 	uint16_t	Module;
128 	uint16_t	Bank;
129 	uint16_t	Device;
130 	uint16_t	Row;
131 	uint16_t	Column;
132 	uint16_t	BitPosition;
133 	uint64_t	RequestorId;
134 	uint64_t	ResponderId;
135 	uint64_t	TargetId;
136 	uint8_t		MemoryErrorType;
137 } __packed;
138 __CTASSERT(sizeof(struct cper_memory_error) == 73);
139 
140 struct cper_memory_error_ext {
141 	struct cper_memory_error	Base;
142 	uint8_t		Extended;
143 	uint16_t	RankNumber;
144 	uint16_t	CardHandle;
145 	uint16_t	ModuleHandle;
146 } __packed;
147 __CTASSERT(sizeof(struct cper_memory_error_ext) == 80);
148 
149 enum {				/* struct cper_memory_error::ValidationBits */
150 	CPER_MEMORY_ERROR_VALID_ERROR_STATUS		= __BIT(0),
151 	CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS	= __BIT(1),
152 	CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK	= __BIT(2),
153 	CPER_MEMORY_ERROR_VALID_NODE			= __BIT(3),
154 	CPER_MEMORY_ERROR_VALID_CARD			= __BIT(4),
155 	CPER_MEMORY_ERROR_VALID_MODULE			= __BIT(5),
156 	CPER_MEMORY_ERROR_VALID_BANK			= __BIT(6),
157 	CPER_MEMORY_ERROR_VALID_DEVICE			= __BIT(7),
158 	CPER_MEMORY_ERROR_VALID_ROW			= __BIT(8),
159 	CPER_MEMORY_ERROR_VALID_COLUMN			= __BIT(9),
160 	CPER_MEMORY_ERROR_VALID_BIT_POSITION		= __BIT(10),
161 	CPER_MEMORY_ERROR_VALID_REQUESTOR_ID		= __BIT(11),
162 	CPER_MEMORY_ERROR_VALID_RESPONDER_ID		= __BIT(12),
163 	CPER_MEMORY_ERROR_VALID_TARGET_ID		= __BIT(13),
164 	CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE	= __BIT(14),
165 	CPER_MEMORY_ERROR_VALID_RANK_NUMBER		= __BIT(15),
166 	CPER_MEMORY_ERROR_VALID_CARD_HANDLE		= __BIT(16),
167 	CPER_MEMORY_ERROR_VALID_MODULE_HANDLE		= __BIT(17),
168 	CPER_MEMORY_ERROR_VALID_EXTENDED_ROW		= __BIT(18),
169 	CPER_MEMORY_ERROR_VALID_BANK_GROUP		= __BIT(19),
170 	CPER_MEMORY_ERROR_VALID_BANK_ADDRESS		= __BIT(20),
171 	CPER_MEMORY_ERROR_VALID_CHIP_ID			= __BIT(21),
172 };
173 
174 #define	CPER_MEMORY_ERROR_VALIDATION_BITS_FMT	"\177\020"		      \
175 	"b\000"	"ERROR_STATUS\0"					      \
176 	"b\001"	"PHYSICAL_ADDRESS\0"					      \
177 	"b\002"	"PHYSICAL_ADDRESS_MASK\0"				      \
178 	"b\003"	"NODE\0"						      \
179 	"b\004"	"CARD\0"						      \
180 	"b\005"	"MODULE\0"						      \
181 	"b\006"	"BANK\0"						      \
182 	"b\007"	"DEVICE\0"						      \
183 	"b\010"	"ROW\0"							      \
184 	"b\011"	"COLUMN\0"						      \
185 	"b\012"	"BIT_POSITION\0"					      \
186 	"b\013"	"REQUESTOR_ID\0"					      \
187 	"b\014"	"RESPONDER_ID\0"					      \
188 	"b\015"	"TARGET_ID\0"						      \
189 	"b\016"	"MEMORY_ERROR_TYPE\0"					      \
190 	"b\017"	"RANK_NUMBER\0"						      \
191 	"b\020"	"CARD_HANDLE\0"						      \
192 	"b\021"	"MODULE_HANDLE\0"					      \
193 	"b\022"	"EXTENDED_ROW\0"					      \
194 	"b\023"	"BANK_GROUP\0"						      \
195 	"b\024"	"BANK_ADDRESS\0"					      \
196 	"b\025"	"CHIP_ID\0"						      \
197 	"\0"
198 
199 enum {				/* struct cper_memory_error::Bank */
200 	CPER_MEMORY_ERROR_BANK_ADDRESS	= __BITS(7,0),
201 	CPER_MEMORY_ERROR_BANK_GROUP	= __BITS(15,8),
202 };
203 
204 #define	CPER_MEMORY_ERROR_TYPES(F)					      \
205 	F(CPER_MEMORY_ERROR_UNKNOWN, UNKNOWN, 0)			      \
206 	F(CPER_MEMORY_ERROR_NO_ERROR, NO_ERROR, 1)			      \
207 	F(CPER_MEMORY_ERROR_SINGLEBIT_ECC, SINGLEBIT_ECC, 2)		      \
208 	F(CPER_MEMORY_ERROR_MULTIBIT_ECC, MULTIBIT_ECC, 3)		      \
209 	F(CPER_MEMORY_ERROR_SINGLESYM_CHIPKILL_ECC, SINGLESYM_CHIPKILL_ECC, 4)\
210 	F(CPER_MEMORY_ERROR_MULTISYM_CHIPKILL_ECC, MULTISYM_CHIPKILL_ECC, 5)  \
211 	F(CPER_MEMORY_ERROR_MASTER_ABORT, MASTER_ABORT, 6)		      \
212 	F(CPER_MEMORY_ERROR_TARGET_ABORT, TARGET_ABORT, 7)		      \
213 	F(CPER_MEMORY_ERROR_PARITY_ERROR, PARITY_ERROR, 8)		      \
214 	F(CPER_MEMORY_ERROR_WATCHDOG_TIMEOUT, WATCHDOG_TIMEOUT, 9)	      \
215 	F(CPER_MEMORY_ERROR_INVALID_ADDRESS, INVALID_ADDRESS, 10)	      \
216 	F(CPER_MEMORY_ERROR_MIRROR_BROKEN, MIRROR_BROKEN, 11)		      \
217 	F(CPER_MEMORY_ERROR_MEMORY_SPARING, MEMORY_SPARING, 12)		      \
218 	F(CPER_MEMORY_ERROR_SCRUB_CORRECTED_ERROR, SCRUB_CORRECTED_ERROR, 13) \
219 	F(CPER_MEMORY_ERROR_SCRUB_UNCORRECTED_ERROR, SCRUB_UNCORRECTED_ERROR, \
220 	    14)								      \
221 	F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15)   \
222 	/* end of CPER_MEMORY_ERROR_TYPES */
223 
224 enum cper_memory_error_type { /* struct cper_memory_error::MemoryErrorType */
225 #define	CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V)	LN = V,
226 	CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF)
227 #undef	CPER_MEMORY_ERROR_TYPE_DEF
228 };
229 
230 enum {				/* struct cper_memory_error_ext::Extended */
231 	CPER_MEMORY_ERROR_EXTENDED_ROWBIT16		= __BIT(0),
232 	CPER_MEMORY_ERROR_EXTENDED_ROWBIT17		= __BIT(1),
233 	CPER_MEMORY_ERROR_EXTENDED_CHIPID		= __BITS(7,5),
234 };
235 
236 /*
237  * N.2.7. PCI Express Error Section
238  *
239  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
240  *
241  * Type: {0xd995e954,0xbbc1,0x430f,{0xad,0x91,0xb4,0x4d,0xcb,0x3c,0x6f,0x35}}
242  */
243 
244 struct cper_pcie_error {
245 	uint64_t	ValidationBits;
246 	uint32_t	PortType;
247 	uint32_t	Version;
248 	uint32_t	CommandStatus;
249 	uint32_t	Reserved0;
250 	struct {
251 		uint8_t		VendorID[2];
252 		uint8_t		DeviceID[2]; /* product */
253 		uint8_t		ClassCode[3];
254 		uint8_t		Function;
255 		uint8_t		Device;
256 		uint8_t		Segment[2];
257 		uint8_t		Bus;
258 		uint8_t		SecondaryBus;
259 		uint8_t		Slot[2]; /* bits 0:2 resv, bits 3:15 slot */
260 		uint8_t		Reserved0;
261 	}		DeviceID;
262 	uint64_t	DeviceSerial;
263 	uint32_t	BridgeControlStatus;
264 	uint8_t		CapabilityStructure[60];
265 	uint8_t		AERInfo[96];
266 } __packed;
267 __CTASSERT(sizeof(struct cper_pcie_error) == 208);
268 
269 enum {				/* struct cper_pcie_error::ValidationBits */
270 	CPER_PCIE_ERROR_VALID_PORT_TYPE			= __BIT(0),
271 	CPER_PCIE_ERROR_VALID_VERSION			= __BIT(1),
272 	CPER_PCIE_ERROR_VALID_COMMAND_STATUS		= __BIT(2),
273 	CPER_PCIE_ERROR_VALID_DEVICE_ID			= __BIT(3),
274 	CPER_PCIE_ERROR_VALID_DEVICE_SERIAL		= __BIT(4),
275 	CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS	= __BIT(5),
276 	CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE	= __BIT(6),
277 	CPER_PCIE_ERROR_VALID_AER_INFO			= __BIT(7),
278 };
279 
280 #define	CPER_PCIE_ERROR_VALIDATION_BITS_FMT	"\177\020"		      \
281 	"b\000"	"PORT_TYPE\0"						      \
282 	"b\001"	"VERSION\0"						      \
283 	"b\002"	"COMMAND_STATUS\0"					      \
284 	"b\003"	"DEVICE_ID\0"						      \
285 	"b\004"	"DEVICE_SERIAL\0"					      \
286 	"b\005"	"BRIDGE_CONTROL_STATUS\0"				      \
287 	"b\006"	"CAPABILITY_STRUCTURE\0"				      \
288 	"b\007"	"AER_INFO\0"						      \
289 	"\0"
290 
291 #define	CPER_PCIE_ERROR_PORT_TYPES(F)					      \
292 	F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_ENDPOINT, PCIE_ENDPOINT, 0)	      \
293 	F(CPER_PCIE_ERROR_PORT_TYPE_LEGACY_PCI_ENDPOINT, LEGACY_PCI_ENDPOINT, \
294 	    1)								      \
295 	F(CPER_PCIE_ERROR_PORT_TYPE_ROOTPORT5_UPSTREAMSWITCH,		      \
296 	    ROOTPORT5_UPSTREAMSWITCH, 4)				      \
297 	F(CPER_PCIE_ERROR_PORT_TYPE_DOWNSTREAMSWITCH, DOWNSTREAMSWITCH, 6)    \
298 	F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_PCI_BRIDGE, PCIE_PCI_BRIDGE, 7)      \
299 	F(CPER_PCIE_ERROR_PORT_TYPE_PCI_PCIE_BRIDGE, PCI_PCIE_BRIDGE, 8)      \
300 	F(CPER_PCIE_ERROR_PORT_TYPE_RCIEP_DEV, RCIEP_DEV, 9)		      \
301 		/* Root Complex Integrated Endpoint Device */		      \
302 	F(CPER_PCIE_ERROR_PORT_TYPE_RCEC, RCEC, 10)			      \
303 		/* Root Complex Event Collector */			      \
304 	/* end of CPER_PCIE_ERROR_PORT_TYPES */
305 
306 enum cper_pcie_error_port_type { /* struct cper_pcie_error::PortType */
307 #define	CPER_PCIE_ERROR_PORT_TYPE_DEF(LN, SN, V)	LN = V,
308 	CPER_PCIE_ERROR_PORT_TYPES(CPER_PCIE_ERROR_PORT_TYPE_DEF)
309 #undef	CPER_PCIE_ERROR_PORT_TYPE_DEF
310 };
311 
312 #endif	/* _SYS_DEV_ACPI_APEI_CPER_H_ */
313