xref: /netbsd-src/sys/arch/x86/x86/errata.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: errata.c,v 1.27 2021/10/07 12:52:27 msaitoh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Detect, report on, and work around known errata with x86 CPUs.
34  *
35  * This currently only handles AMD CPUs, and is generalised because
36  * there are quite a few problems that the BIOS can patch via MSR,
37  * but it is not known if the OS can patch these yet.  The list is
38  * expected to grow over time.
39  *
40  * The data here are from: Revision Guide for AMD Athlon 64 and
41  * AMD Opteron Processors, Publication #25759, Revision: 3.69,
42  * Issue Date: September 2006
43  *
44  * XXX This should perhaps be integrated with the identcpu code.
45  */
46 
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: errata.c,v 1.27 2021/10/07 12:52:27 msaitoh Exp $");
49 
50 #include <sys/types.h>
51 #include <sys/systm.h>
52 
53 #include <machine/cpu.h>
54 #include <machine/cpufunc.h>
55 #include <machine/specialreg.h>
56 
57 #include <x86/cpuvar.h>
58 #include <x86/cputypes.h>
59 
60 typedef struct errata {
61 	u_short		e_num;
62 	u_short		e_reported;
63 	u_int		e_data1;
64 	const uint8_t	*e_set;
65 	bool		(*e_act)(struct cpu_info *, struct errata *);
66 	uint64_t	e_data2;
67 } errata_t;
68 
69 typedef enum cpurev {
70 	BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
71 	JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
72 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
73 	DA_C3, HY_D0, HY_D1, HY_D1_G34R1,  PH_E0, LN_B0, KB_A1,
74 	ML_A1, ZP_B1, ZP_B2, PiR_B2, OINK
75 } cpurev_t;
76 
77 static const u_int cpurevs[] = {
78 	BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
79 	CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
80 	DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
81 	DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
82 	DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
83 	JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
84 	SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
85 	SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
86 	SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
87 	SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
88 	DR_BA, 0x0100f2a, DR_B2, 0x0100f22, DR_B3, 0x0100f23,
89 	RB_C2, 0x0100f42, RB_C3, 0x0100f43, BL_C2, 0x0100f52,
90 	BL_C3, 0x0100f53, DA_C2, 0x0100f62, DA_C3, 0x0100f63,
91 	HY_D0, 0x0100f80, HY_D1, 0x0100f81, HY_D1_G34R1, 0x0100f91,
92 	PH_E0, 0x0100fa0, LN_B0, 0x0300f10, KB_A1, 0x0700F01,
93 	ML_A1, 0x0730F01, ZP_B1, 0x0800F11, ZP_B2, 0x0800F12,
94 	PiR_B2, 0x0800F82,
95 	OINK
96 };
97 
98 static const uint8_t x86_errata_set1[] = {
99 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
100 };
101 
102 static const uint8_t x86_errata_set2[] = {
103 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
104 };
105 
106 static const uint8_t x86_errata_set3[] = {
107 	JH_E1, DH_E3, OINK
108 };
109 
110 static const uint8_t x86_errata_set4[] = {
111 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
112 	DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
113 };
114 
115 static const uint8_t x86_errata_set5[] = {
116 	SH_B3, OINK
117 };
118 
119 static const uint8_t x86_errata_set6[] = {
120 	SH_C0, SH_CG, DH_CG, CH_CG, OINK
121 };
122 
123 static const uint8_t x86_errata_set7[] = {
124 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
125 };
126 
127 static const uint8_t x86_errata_set8[] = {
128 	BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
129 	DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
130 	JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
131 	SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
132 };
133 
134 static const uint8_t x86_errata_set9[] = {
135 	DR_BA, DR_B2, OINK
136 };
137 
138 static const uint8_t x86_errata_set10[] = {
139 	DR_BA, DR_B2, DR_B3, OINK
140 };
141 
142 static const uint8_t x86_errata_set11[] = {
143 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
144 	DA_C3, HY_D0, HY_D1, HY_D1_G34R1,  PH_E0, LN_B0, OINK
145 };
146 
147 static const uint8_t x86_errata_set12[] = {
148 	KB_A1, OINK
149 };
150 
151 static const uint8_t x86_errata_set13[] = {
152 	ZP_B1, ZP_B2, PiR_B2, OINK
153 };
154 
155 static const uint8_t x86_errata_set14[] = {
156 	ZP_B1, OINK
157 };
158 
159 static const uint8_t x86_errata_set15[] = {
160 	KB_A1, ML_A1, OINK
161 };
162 
163 static bool x86_errata_setmsr(struct cpu_info *, errata_t *);
164 static bool x86_errata_testmsr(struct cpu_info *, errata_t *);
165 
166 static errata_t errata[] = {
167 	/*
168 	 * 81: Cache Coherency Problem with Hardware Prefetching
169 	 * and Streaming Stores
170 	 */
171 	{
172 		81, FALSE, MSR_DC_CFG, x86_errata_set5,
173 		x86_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
174 	},
175 	/*
176 	 * 86: DRAM Data Masking Feature Can Cause ECC Failures
177 	 */
178 	{
179 		86, FALSE, MSR_NB_CFG, x86_errata_set1,
180 		x86_errata_testmsr, NB_CFG_DISDATMSK
181 	},
182 	/*
183 	 * 89: Potential Deadlock With Locked Transactions
184 	 */
185 	{
186 		89, FALSE, MSR_NB_CFG, x86_errata_set8,
187 		x86_errata_testmsr, NB_CFG_DISIOREQLOCK
188 	},
189 	/*
190 	 * 94: Sequential Prefetch Feature May Cause Incorrect
191 	 * Processor Operation
192 	 */
193 	{
194 		94, FALSE, MSR_IC_CFG, x86_errata_set1,
195 		x86_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
196 	},
197 	/*
198 	 * 97: 128-Bit Streaming Stores May Cause Coherency
199 	 * Failure
200 	 *
201 	 * XXX "This workaround must not be applied to processors
202 	 * prior to revision C0."  We don't apply it, but if it
203 	 * can't be applied, it shouldn't be reported.
204 	 */
205 	{
206 		97, FALSE, MSR_DC_CFG, x86_errata_set6,
207 		x86_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
208 	},
209 	/*
210 	 * 104: DRAM Data Masking Feature Causes ChipKill ECC
211 	 * Failures When Enabled With x8/x16 DRAM Devices
212 	 */
213 	{
214 		104, FALSE, MSR_NB_CFG, x86_errata_set7,
215 		x86_errata_testmsr, NB_CFG_DISDATMSK
216 	},
217 	/*
218 	 * 113: Enhanced Write-Combining Feature Causes System Hang
219 	 */
220 	{
221 		113, FALSE, MSR_BU_CFG, x86_errata_set3,
222 		x86_errata_setmsr, BU_CFG_WBENHWSBDIS
223 	},
224 	/*
225 	 * 69: Multiprocessor Coherency Problem with Hardware
226 	 * Prefetch Mechanism
227 	 */
228 	{
229 		69, FALSE, MSR_BU_CFG, x86_errata_set5,
230 		x86_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
231 	},
232 	/*
233 	 * 101: DRAM Scrubber May Cause Data Corruption When Using
234 	 * Node-Interleaved Memory
235 	 */
236 	{
237 		101, FALSE, 0, x86_errata_set2,
238 		NULL, 0
239 	},
240 	/*
241 	 * 106: Potential Deadlock with Tightly Coupled Semaphores
242 	 * in an MP System
243 	 */
244 	{
245 		106, FALSE, MSR_LS_CFG, x86_errata_set2,
246 		x86_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
247 	},
248 	/*
249 	 * 107: Possible Multiprocessor Coherency Problem with
250 	 * Setting Page Table A/D Bits
251 	 */
252 	{
253 		107, FALSE, MSR_BU_CFG, x86_errata_set2,
254 		x86_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
255 	},
256 	/*
257 	 * 122: TLB Flush Filter May Cause Coherency Problem in
258 	 * Multiprocessor Systems
259 	 */
260 	{
261 		122, FALSE, MSR_HWCR, x86_errata_set4,
262 		x86_errata_setmsr, HWCR_FFDIS
263 	},
264 	/*
265 	 * 254: Internal Resource Livelock Involving Cached TLB Reload
266 	 */
267 	{
268 		254, FALSE, MSR_BU_CFG, x86_errata_set9,
269 		x86_errata_testmsr, BU_CFG_ERRATA_254
270 	},
271 	/*
272 	 * 261: Processor May Stall Entering Stop-Grant Due to Pending Data
273 	 * Cache Scrub
274 	 */
275 	{
276 		261, FALSE, MSR_DC_CFG, x86_errata_set10,
277 		x86_errata_testmsr, DC_CFG_ERRATA_261
278 	},
279 	/*
280 	 * 298: L2 Eviction May Occur During Processor Operation To Set
281 	 * Accessed or Dirty Bit
282 	 */
283 	{
284 		298, FALSE, MSR_HWCR, x86_errata_set9,
285 		x86_errata_testmsr, HWCR_TLBCACHEDIS
286 	},
287 	{
288 		298, FALSE, MSR_BU_CFG, x86_errata_set9,
289 		x86_errata_testmsr, BU_CFG_ERRATA_298
290 	},
291 	/*
292 	 * 309: Processor Core May Execute Incorrect Instructions on
293 	 * Concurrent L2 and Northbridge Response
294 	 */
295 	{
296 		309, FALSE, MSR_BU_CFG, x86_errata_set9,
297 		x86_errata_testmsr, BU_CFG_ERRATA_309
298 	},
299 	/*
300 	 * 721: Processor May Incorrectly Update Stack Pointer
301 	 */
302 	{
303 		721, FALSE, MSR_DE_CFG, x86_errata_set11,
304 		x86_errata_setmsr, DE_CFG_ERRATA_721
305 	},
306 	/*
307 	 * 776: Incorrect Processor Branch Prediction for Two Consecutive
308 	 * Linear Pages
309 	 */
310 	{
311 		776, FALSE, MSR_IC_CFG, x86_errata_set12,
312 		x86_errata_setmsr, IC_CFG_ERRATA_776
313 	},
314 	/*
315 	 * 793: Specific Combination of Writes to Write Combined Memory
316 	 * Types and Locked Instructions May Cause Core Hang
317 	 */
318 	{
319 		793, FALSE, MSR_LS_CFG, x86_errata_set15,
320 		x86_errata_setmsr, LS_CFG_ERRATA_793
321 	},
322 	/*
323 	 * 1021: Load Operation May Receive Stale Data From Older Store
324 	 * Operation
325 	 */
326 	{
327 		1021, FALSE, MSR_DE_CFG, x86_errata_set13,
328 		x86_errata_setmsr, DE_CFG_ERRATA_1021
329 	},
330 	/*
331 	 * 1033: A Lock Operation May Cause the System to Hang
332 	 */
333 	{
334 		1033, FALSE, MSR_LS_CFG, x86_errata_set14,
335 		x86_errata_setmsr, LS_CFG_ERRATA_1033
336 	},
337 	/*
338 	 * 1049: FCMOV Instruction May Not Execute Correctly
339 	 */
340 	{
341 		1049, FALSE, MSR_FP_CFG, x86_errata_set13,
342 		x86_errata_setmsr, FP_CFG_ERRATA_1049
343 	},
344 #if 0	/* Should we apply this errata? The other OSes don't. */
345 	/*
346 	 * 1091: Address Boundary Crossing Load Operation May Receive
347 	 * Stale Data
348 	 */
349 	{
350 		1091, FALSE, MSR_LS_CFG2, x86_errata_set13,
351 		x86_errata_setmsr, LS_CFG2_ERRATA_1091
352 	},
353 #endif
354 	/*
355 	 * 1095: Potential Violation of Read Ordering In Lock Operation
356 	 * In SMT (Simultaneous Multithreading) Mode
357 	 */
358 	{
359 		1095, FALSE, MSR_LS_CFG, x86_errata_set13,
360 		x86_errata_setmsr, LS_CFG_ERRATA_1095
361 	},
362 };
363 
364 static bool
365 x86_errata_testmsr(struct cpu_info *ci, errata_t *e)
366 {
367 	uint64_t val;
368 
369 	(void)ci;
370 
371 	val = rdmsr_locked(e->e_data1);
372 	if ((val & e->e_data2) != 0)
373 		return FALSE;
374 
375 	e->e_reported = TRUE;
376 	return TRUE;
377 }
378 
379 static bool
380 x86_errata_setmsr(struct cpu_info *ci, errata_t *e)
381 {
382 	uint64_t val;
383 
384 	(void)ci;
385 
386 	val = rdmsr_locked(e->e_data1);
387 	if ((val & e->e_data2) != 0)
388 		return FALSE;
389 	wrmsr_locked(e->e_data1, val | e->e_data2);
390 	aprint_debug_dev(ci->ci_dev, "erratum %d patched\n",
391 	    e->e_num);
392 
393 	return FALSE;
394 }
395 
396 void
397 x86_errata(void)
398 {
399 	struct cpu_info *ci;
400 	uint32_t descs[4];
401 	errata_t *e, *ex;
402 	cpurev_t rev;
403 	int i, j, upgrade;
404 	static int again;
405 
406 	/* don't run if we are under a hypervisor */
407 	if (cpu_feature[1] & CPUID2_RAZ)
408 		return;
409 
410 	/* only for AMD */
411 	if (cpu_vendor != CPUVENDOR_AMD)
412 		return;
413 
414 	ci = curcpu();
415 
416 	x86_cpuid(0x80000001, descs);
417 
418 	for (i = 0;; i += 2) {
419 		if ((rev = cpurevs[i]) == OINK)
420 			return;
421 		if (cpurevs[i + 1] == descs[0])
422 			break;
423 	}
424 
425 	ex = errata + __arraycount(errata);
426 	for (upgrade = 0, e = errata; e < ex; e++) {
427 		if (e->e_reported)
428 			continue;
429 		if (e->e_set != NULL) {
430 			for (j = 0; e->e_set[j] != OINK; j++)
431 				if (e->e_set[j] == rev)
432 					break;
433 			if (e->e_set[j] == OINK)
434 				continue;
435 		}
436 
437 		aprint_debug_dev(ci->ci_dev, "testing for erratum %d\n",
438 		    e->e_num);
439 
440 		if (e->e_act == NULL)
441 			e->e_reported = TRUE;
442 		else if ((*e->e_act)(ci, e) == FALSE)
443 			continue;
444 
445 		aprint_verbose_dev(ci->ci_dev, "erratum %d present\n",
446 		    e->e_num);
447 		upgrade = 1;
448 	}
449 
450 	if (upgrade && !again) {
451 		again = 1;
452 		aprint_normal_dev(ci->ci_dev, "WARNING: errata present,"
453 		    " BIOS upgrade may be\n");
454 		aprint_normal_dev(ci->ci_dev, "WARNING: necessary to ensure"
455 		    " reliable operation\n");
456 	}
457 }
458