xref: /netbsd-src/sys/arch/x86/x86/errata.c (revision cb3c2134d1f7209ee1522179e01dcc5cba861e8f)
1 /*	$NetBSD: errata.c,v 1.35 2023/10/27 05:45:00 mrg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Detect, report on, and work around known errata with x86 CPUs.
34  *
35  * This currently only handles AMD CPUs, and is generalised because
36  * there are quite a few problems that the BIOS can patch via MSR,
37  * but it is not known if the OS can patch these yet.  The list is
38  * expected to grow over time.
39  *
40  * The data here are from: Revision Guide for AMD Athlon 64 and
41  * AMD Opteron Processors, Publication #25759, Revision: 3.69,
42  * Issue Date: September 2006
43  *
44  * https://www.amd.com/system/files/TechDocs/25759.pdf
45  *
46  * XXX This should perhaps be integrated with the identcpu code.
47  */
48 
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: errata.c,v 1.35 2023/10/27 05:45:00 mrg Exp $");
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/xcall.h>
55 #include <sys/kthread.h>
56 #include <sys/clock.h>
57 
58 #include <machine/cpu.h>
59 #include <machine/cpufunc.h>
60 #include <machine/specialreg.h>
61 
62 #include <x86/cpuvar.h>
63 #include <x86/cputypes.h>
64 
65 typedef struct errata {
66 	u_short		e_num;
67 	u_short		e_reported;
68 	u_int		e_data1;
69 	const uint8_t	*e_set;
70 	bool		(*e_act)(struct cpu_info *, struct errata *);
71 	uint64_t	e_data2;
72 	const char	*e_name;	/* use if e_num == 0 */
73 } errata_t;
74 
75 /* These names match names from various AMD Errata/Revision Guides. */
76 typedef enum cpurev {
77 	/* K8 / Family 0Fh */
78 	BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
79 	JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
80 
81 	/* K10 / Family 10h */
82 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
83 	DA_C3, HY_D0, HY_D1, HY_D1_G34R1,  PH_E0,
84 
85 	/* Llano / Family 12h */
86 	LN_B0,
87 
88 	/* Jaguar / Family 16h */
89 	KB_A1, ML_A1,
90 
91 	/* Zen/Zen+/Zen2 / Family 17h */
92 	ZP_B1, ZP_B2, PiR_B2, Rome_B0,
93 
94 	/* XXX client Zen2 names aren't known yet. */
95 	Z2_XB, Z2_Ren, Z2_Luc, Z2_Mat, Z2_VG, Z2_Men,
96 
97 	/* Zen3/Zen4 / Family 19h */
98 	Milan_B1, Milan_B2, Genoa_B1,
99 	OINK
100 } cpurev_t;
101 
102 /*
103  * The bit-layout in the 0x80000001 CPUID result is, with bit-size
104  * as the final number here:
105  *
106  *    resv1_4 extfam_8 extmodel_4 resv2_4 fam_4 model_4 stepping_4
107  *
108  * The CPUREV(family,model,stepping) macro handles the mapping for
109  * family 6 and family 15 in the "fam_4" nybble, if 6 or 15, the
110  * extended model is present and is bit-concatenated, and if 15,
111  * the extended family is additional (ie, family 0x10 is 0xF in
112  * fam_4 and 0x01 in extfam_8.)
113  */
114 #define CPUREV(fam,mod,step)				\
115 	(((fam) > 0xf ?					\
116 	  (0xf << 8) | ((fam) - 0xf) << 20 :		\
117 	  (fam) << 8) |					\
118 	 (((mod) & 0xf) << 4) |				\
119 	 (((fam) == 6 || ((fam) >= 0xf)) ?		\
120 	  ((mod) & 0xf0) << 12 : 0) |			\
121 	 ((step) & 0xf))
122 static const u_int cpurevs[] = {
123 	BH_E4,		CPUREV(0x0F, 0x2B, 0x1),
124 	CH_CG,		CPUREV(0x0F, 0x08, 0x2),
125 	CH_CG,		CPUREV(0x0F, 0x0B, 0x2),
126 	CH_D0,		CPUREV(0x0F, 0x18, 0x0),
127 	CH_D0,		CPUREV(0x0F, 0x1B, 0x0),
128 	DH_CG,		CPUREV(0x0F, 0x0C, 0x0),
129 	DH_CG,		CPUREV(0x0F, 0x0E, 0x0),
130 	DH_CG,		CPUREV(0x0F, 0x0F, 0x0),
131 	DH_D0,		CPUREV(0x0F, 0x1C, 0x0),
132 	DH_D0,		CPUREV(0x0F, 0x1F, 0x0),
133 	DH_E3,		CPUREV(0x0F, 0x2C, 0x0),
134 	DH_E3,		CPUREV(0x0F, 0x2F, 0x0),
135 	DH_E6,		CPUREV(0x0F, 0x2C, 0x2),
136 	DH_E6,		CPUREV(0x0F, 0x2F, 0x2),
137 	JH_E1,		CPUREV(0x0F, 0x21, 0x0),
138 	JH_E6,		CPUREV(0x0F, 0x21, 0x2),
139 	JH_E6,		CPUREV(0x0F, 0x23, 0x2),
140 	SH_B0,		CPUREV(0x0F, 0x04, 0x0),
141 	SH_B3,		CPUREV(0x0F, 0x05, 0x1),
142 	SH_C0,		CPUREV(0x0F, 0x04, 0x8),
143 	SH_C0,		CPUREV(0x0F, 0x05, 0x8),
144 	SH_CG,		CPUREV(0x0F, 0x04, 0xA),
145 	SH_CG,		CPUREV(0x0F, 0x05, 0xA),
146 	SH_CG,		CPUREV(0x0F, 0x07, 0xA),
147 	SH_D0,		CPUREV(0x0F, 0x14, 0x0),
148 	SH_D0,		CPUREV(0x0F, 0x15, 0x0),
149 	SH_D0,		CPUREV(0x0F, 0x17, 0x0),
150 	SH_E4,		CPUREV(0x0F, 0x25, 0x1),
151 	SH_E4,		CPUREV(0x0F, 0x27, 0x1),
152 	SH_E5,		CPUREV(0x0F, 0x24, 0x2),
153 
154 	DR_BA,		CPUREV(0x10, 0x02, 0xA),
155 	DR_B2,		CPUREV(0x10, 0x02, 0x2),
156 	DR_B3,		CPUREV(0x10, 0x02, 0x3),
157 	RB_C2,		CPUREV(0x10, 0x04, 0x2),
158 	RB_C3,		CPUREV(0x10, 0x04, 0x3),
159 	BL_C2,		CPUREV(0x10, 0x05, 0x2),
160 	BL_C3,		CPUREV(0x10, 0x05, 0x3),
161 	DA_C2,		CPUREV(0x10, 0x06, 0x2),
162 	DA_C3,		CPUREV(0x10, 0x06, 0x3),
163 	HY_D0,		CPUREV(0x10, 0x08, 0x0),
164 	HY_D1,		CPUREV(0x10, 0x08, 0x1),
165 	HY_D1_G34R1,	CPUREV(0x10, 0x09, 0x1),
166 	PH_E0,		CPUREV(0x10, 0x0A, 0x0),
167 
168 	LN_B0,		CPUREV(0x12, 0x01, 0x0),
169 
170 	KB_A1,		CPUREV(0x16, 0x00, 0x1),
171 	ML_A1,		CPUREV(0x16, 0x30, 0x1),
172 
173 	ZP_B1,		CPUREV(0x17, 0x01, 0x1),
174 	ZP_B2,		CPUREV(0x17, 0x01, 0x2),
175 	PiR_B2,		CPUREV(0x17, 0x08, 0x2),
176 	Rome_B0,	CPUREV(0x17, 0x31, 0x0),
177 	Z2_XB,		CPUREV(0x17, 0x47, 0x0),
178 	Z2_Ren,		CPUREV(0x17, 0x60, 0x1),
179 	Z2_Luc,		CPUREV(0x17, 0x68, 0x1),
180 	Z2_Mat,		CPUREV(0x17, 0x71, 0x0),
181 	Z2_VG,		CPUREV(0x17, 0x90, 0x2),
182 	Z2_Men,		CPUREV(0x17, 0xA0, 0x0),
183 
184 	Milan_B1,	CPUREV(0x19, 0x01, 0x1),
185 	Milan_B2,	CPUREV(0x19, 0x01, 0x2),
186 	Genoa_B1,	CPUREV(0x19, 0x11, 0x1),
187 	OINK
188 };
189 
190 static const uint8_t x86_errata_set1[] = {
191 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
192 };
193 
194 static const uint8_t x86_errata_set2[] = {
195 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
196 };
197 
198 static const uint8_t x86_errata_set3[] = {
199 	JH_E1, DH_E3, OINK
200 };
201 
202 static const uint8_t x86_errata_set4[] = {
203 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
204 	DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
205 };
206 
207 static const uint8_t x86_errata_set5[] = {
208 	SH_B3, OINK
209 };
210 
211 static const uint8_t x86_errata_set6[] = {
212 	SH_C0, SH_CG, DH_CG, CH_CG, OINK
213 };
214 
215 static const uint8_t x86_errata_set7[] = {
216 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
217 };
218 
219 static const uint8_t x86_errata_set8[] = {
220 	BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
221 	DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
222 	JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
223 	SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
224 };
225 
226 static const uint8_t x86_errata_set9[] = {
227 	DR_BA, DR_B2, OINK
228 };
229 
230 static const uint8_t x86_errata_set10[] = {
231 	DR_BA, DR_B2, DR_B3, OINK
232 };
233 
234 static const uint8_t x86_errata_set11[] = {
235 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
236 	DA_C3, HY_D0, HY_D1, HY_D1_G34R1,  PH_E0, LN_B0, OINK
237 };
238 
239 static const uint8_t x86_errata_set12[] = {
240 	KB_A1, OINK
241 };
242 
243 static const uint8_t x86_errata_set13[] = {
244 	ZP_B1, ZP_B2, PiR_B2, OINK
245 };
246 
247 static const uint8_t x86_errata_set14[] = {
248 	ZP_B1, OINK
249 };
250 
251 static const uint8_t x86_errata_set15[] = {
252 	KB_A1, ML_A1, OINK
253 };
254 
255 static const uint8_t x86_errata_zen2[] = {
256 	Rome_B0, Z2_XB, Z2_Ren, Z2_Luc, Z2_Mat, Z2_VG, Z2_Men, OINK
257 };
258 
259 static bool x86_errata_setmsr(struct cpu_info *, errata_t *);
260 static bool x86_errata_testmsr(struct cpu_info *, errata_t *);
261 static bool x86_errata_amd_1474(struct cpu_info *, errata_t *);
262 
263 static errata_t errata[] = {
264 	/*
265 	 * 81: Cache Coherency Problem with Hardware Prefetching
266 	 * and Streaming Stores
267 	 */
268 	{
269 		81, FALSE, MSR_DC_CFG, x86_errata_set5,
270 		x86_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF, NULL
271 	},
272 	/*
273 	 * 86: DRAM Data Masking Feature Can Cause ECC Failures
274 	 */
275 	{
276 		86, FALSE, MSR_NB_CFG, x86_errata_set1,
277 		x86_errata_testmsr, NB_CFG_DISDATMSK, NULL
278 	},
279 	/*
280 	 * 89: Potential Deadlock With Locked Transactions
281 	 */
282 	{
283 		89, FALSE, MSR_NB_CFG, x86_errata_set8,
284 		x86_errata_testmsr, NB_CFG_DISIOREQLOCK, NULL
285 	},
286 	/*
287 	 * 94: Sequential Prefetch Feature May Cause Incorrect
288 	 * Processor Operation
289 	 */
290 	{
291 		94, FALSE, MSR_IC_CFG, x86_errata_set1,
292 		x86_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH, NULL
293 	},
294 	/*
295 	 * 97: 128-Bit Streaming Stores May Cause Coherency
296 	 * Failure
297 	 *
298 	 * XXX "This workaround must not be applied to processors
299 	 * prior to revision C0."  We don't apply it, but if it
300 	 * can't be applied, it shouldn't be reported.
301 	 */
302 	{
303 		97, FALSE, MSR_DC_CFG, x86_errata_set6,
304 		x86_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO, NULL
305 	},
306 	/*
307 	 * 104: DRAM Data Masking Feature Causes ChipKill ECC
308 	 * Failures When Enabled With x8/x16 DRAM Devices
309 	 */
310 	{
311 		104, FALSE, MSR_NB_CFG, x86_errata_set7,
312 		x86_errata_testmsr, NB_CFG_DISDATMSK, NULL
313 	},
314 	/*
315 	 * 113: Enhanced Write-Combining Feature Causes System Hang
316 	 */
317 	{
318 		113, FALSE, MSR_BU_CFG, x86_errata_set3,
319 		x86_errata_setmsr, BU_CFG_WBENHWSBDIS, NULL
320 	},
321 	/*
322 	 * 69: Multiprocessor Coherency Problem with Hardware
323 	 * Prefetch Mechanism
324 	 */
325 	{
326 		69, FALSE, MSR_BU_CFG, x86_errata_set5,
327 		x86_errata_setmsr, BU_CFG_WBPFSMCCHKDIS, NULL
328 	},
329 	/*
330 	 * 101: DRAM Scrubber May Cause Data Corruption When Using
331 	 * Node-Interleaved Memory
332 	 */
333 	{
334 		101, FALSE, 0, x86_errata_set2,
335 		NULL, 0, NULL
336 	},
337 	/*
338 	 * 106: Potential Deadlock with Tightly Coupled Semaphores
339 	 * in an MP System
340 	 */
341 	{
342 		106, FALSE, MSR_LS_CFG, x86_errata_set2,
343 		x86_errata_testmsr, LS_CFG_DIS_LS2_SQUISH, NULL
344 	},
345 	/*
346 	 * 107: Possible Multiprocessor Coherency Problem with
347 	 * Setting Page Table A/D Bits
348 	 */
349 	{
350 		107, FALSE, MSR_BU_CFG, x86_errata_set2,
351 		x86_errata_testmsr, BU_CFG_THRL2IDXCMPDIS, NULL
352 	},
353 	/*
354 	 * 122: TLB Flush Filter May Cause Coherency Problem in
355 	 * Multiprocessor Systems
356 	 */
357 	{
358 		122, FALSE, MSR_HWCR, x86_errata_set4,
359 		x86_errata_setmsr, HWCR_FFDIS, NULL
360 	},
361 	/*
362 	 * 254: Internal Resource Livelock Involving Cached TLB Reload
363 	 */
364 	{
365 		254, FALSE, MSR_BU_CFG, x86_errata_set9,
366 		x86_errata_testmsr, BU_CFG_ERRATA_254, NULL
367 	},
368 	/*
369 	 * 261: Processor May Stall Entering Stop-Grant Due to Pending Data
370 	 * Cache Scrub
371 	 */
372 	{
373 		261, FALSE, MSR_DC_CFG, x86_errata_set10,
374 		x86_errata_testmsr, DC_CFG_ERRATA_261, NULL
375 	},
376 	/*
377 	 * 298: L2 Eviction May Occur During Processor Operation To Set
378 	 * Accessed or Dirty Bit
379 	 */
380 	{
381 		298, FALSE, MSR_HWCR, x86_errata_set9,
382 		x86_errata_testmsr, HWCR_TLBCACHEDIS, NULL
383 	},
384 	{
385 		298, FALSE, MSR_BU_CFG, x86_errata_set9,
386 		x86_errata_testmsr, BU_CFG_ERRATA_298, NULL
387 	},
388 	/*
389 	 * 309: Processor Core May Execute Incorrect Instructions on
390 	 * Concurrent L2 and Northbridge Response
391 	 */
392 	{
393 		309, FALSE, MSR_BU_CFG, x86_errata_set9,
394 		x86_errata_testmsr, BU_CFG_ERRATA_309, NULL
395 	},
396 	/*
397 	 * 721: Processor May Incorrectly Update Stack Pointer
398 	 */
399 	{
400 		721, FALSE, MSR_DE_CFG, x86_errata_set11,
401 		x86_errata_setmsr, DE_CFG_ERRATA_721, NULL
402 	},
403 	/*
404 	 * 776: Incorrect Processor Branch Prediction for Two Consecutive
405 	 * Linear Pages
406 	 */
407 	{
408 		776, FALSE, MSR_IC_CFG, x86_errata_set12,
409 		x86_errata_setmsr, IC_CFG_ERRATA_776, NULL
410 	},
411 	/*
412 	 * 793: Specific Combination of Writes to Write Combined Memory
413 	 * Types and Locked Instructions May Cause Core Hang
414 	 */
415 	{
416 		793, FALSE, MSR_LS_CFG, x86_errata_set15,
417 		x86_errata_setmsr, LS_CFG_ERRATA_793, NULL
418 	},
419 	/*
420 	 * 1021: Load Operation May Receive Stale Data From Older Store
421 	 * Operation
422 	 */
423 	{
424 		1021, FALSE, MSR_DE_CFG, x86_errata_set13,
425 		x86_errata_setmsr, DE_CFG_ERRATA_1021, NULL
426 	},
427 	/*
428 	 * 1033: A Lock Operation May Cause the System to Hang
429 	 */
430 	{
431 		1033, FALSE, MSR_LS_CFG, x86_errata_set14,
432 		x86_errata_setmsr, LS_CFG_ERRATA_1033, NULL
433 	},
434 	/*
435 	 * 1049: FCMOV Instruction May Not Execute Correctly
436 	 */
437 	{
438 		1049, FALSE, MSR_FP_CFG, x86_errata_set13,
439 		x86_errata_setmsr, FP_CFG_ERRATA_1049, NULL
440 	},
441 #if 0	/* Should we apply this errata? The other OSes don't. */
442 	/*
443 	 * 1091: Address Boundary Crossing Load Operation May Receive
444 	 * Stale Data
445 	 */
446 	{
447 		1091, FALSE, MSR_LS_CFG2, x86_errata_set13,
448 		x86_errata_setmsr, LS_CFG2_ERRATA_1091, NULL
449 	},
450 #endif
451 	/*
452 	 * 1095: Potential Violation of Read Ordering In Lock Operation
453 	 * In SMT (Simultaneous Multithreading) Mode
454 	 */
455 	{
456 		1095, FALSE, MSR_LS_CFG, x86_errata_set13,
457 		x86_errata_setmsr, LS_CFG_ERRATA_1095, NULL
458 	},
459 	/*
460 	 * 1474: A CPU core may hang after about 1044 days
461 	 */
462 	{
463 		1474, FALSE, MSR_CC6_CFG, x86_errata_zen2,
464 		x86_errata_amd_1474, CC6_CFG_DISABLE_BITS, NULL
465 	},
466 	/*
467 	 * Zenbleed:
468 	 * https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7008.html
469 	 * https://github.com/google/security-research/security/advisories/GHSA-v6wh-rxpg-cmm8
470 	 * https://lock.cmpxchg8b.com/zenbleed.html
471 	 */
472 	{
473 		0, FALSE, MSR_DE_CFG, x86_errata_zen2,
474 		x86_errata_setmsr, DE_CFG_ERRATA_ZENBLEED,
475 		"ZenBleed"
476 	},
477 };
478 
479 /*
480  * 1474: A CPU core may hang after about 1044 days
481  *
482  * This requires disabling CC6 power level, which can be a performance
483  * issue since it stops full turbo in some implementations (eg, half the
484  * cores must be in CC6 to achieve the highest boost level.)  Set a timer
485  * to fire in 1000 days -- except NetBSD timers end up having a signed
486  * 32-bit hz-based value, which rolls over in under 25 days with HZ=1000,
487  * and doing xcall(9) or kthread(9) from a callout is not allowed anyway,
488  * so just have a kthread wait 1 day for 1000 times.
489  */
490 
491 #define AMD_ERRATA_1474_WARN_DAYS	 950
492 #define AMD_ERRATA_1474_BAD_DAYS	1000
493 
494 static void
amd_errata_1474_disable_cc6(void * a1,void * a2)495 amd_errata_1474_disable_cc6(void *a1, void *a2)
496 {
497 	errata_t *e = a1;
498 	uint64_t val;
499 
500 	val = rdmsr_locked(e->e_data1);
501 	if ((val & e->e_data2) == 0)
502 		return;
503 	wrmsr_locked(e->e_data1, val & ~e->e_data2);
504 	aprint_debug_dev(curcpu()->ci_dev, "erratum %u patched\n",
505 	    e->e_num);
506 }
507 
508 static void
amd_errata_1474_thread(void * arg)509 amd_errata_1474_thread(void *arg)
510 {
511 	int loops = 0;
512 	int ticks;
513 
514 	ticks = hz * SECS_PER_DAY;
515 #ifdef X86_ERRATA_TEST_AMD_1474
516 	/*
517 	 * Make this trigger warning after 50 seconds, and workaround
518 	 * at 100 seconds, for easy testing.
519 	 */
520 	ticks = hz;
521 	loops = 900;
522 #endif
523 
524 	while (loops++ < AMD_ERRATA_1474_BAD_DAYS) {
525 		if (loops == AMD_ERRATA_1474_WARN_DAYS) {
526 			printf("warning: AMD Errata 1474 workaround scheduled "
527 			       "for %u days.\n", AMD_ERRATA_1474_BAD_DAYS -
528 						 AMD_ERRATA_1474_WARN_DAYS);
529 			printf("warning: reboot required to avoid.\n");
530 		}
531 		kpause("amd1474", false, ticks, NULL);
532 	}
533 
534 	/* Been 1000 days, disable CC6 and warn about it. */
535 	uint64_t xc = xc_broadcast(0, amd_errata_1474_disable_cc6, arg, NULL);
536 	xc_wait(xc);
537 
538 	printf("warning: AMD CC6 disabled due to errata 1474.\n");
539 	printf("warning: reboot required to restore full turbo speeds.\n");
540 
541 	kthread_exit(0);
542 }
543 
544 static bool
x86_errata_amd_1474(struct cpu_info * ci,errata_t * e)545 x86_errata_amd_1474(struct cpu_info *ci, errata_t *e)
546 {
547 	int error;
548 
549 	/* Don't do anything on non-primary CPUs. */
550 	if (!CPU_IS_PRIMARY(ci))
551 		return FALSE;
552 
553 	error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
554 	    amd_errata_1474_thread, e, NULL, "amd1474");
555 	if (error) {
556 		printf("WARNING: Unable to disable AMD errata 1474!\n");
557 		printf("WARNING: reboot system after %u days to avoid CPU "
558 		    "hangs.\n", AMD_ERRATA_1474_BAD_DAYS);
559 	} else {
560 		aprint_debug_dev(ci->ci_dev, "workaround for erratum %u "
561 		    "scheduled for %u days\n", e->e_num,
562 		    AMD_ERRATA_1474_BAD_DAYS);
563 	}
564 
565 	/* Do own warning here, it's not like most others. */
566 	return FALSE;
567 }
568 
569 static void
x86_errata_log(device_t dev,errata_t * e,const char * msg)570 x86_errata_log(device_t dev, errata_t *e, const char *msg)
571 {
572 
573 	if (e->e_num == 0)
574 		aprint_debug_dev(dev, "erratum '%s' %s\n", e->e_name, msg);
575 	else
576 		aprint_debug_dev(dev, "erratum %u %s\n", e->e_num, msg);
577 }
578 
579 static bool
x86_errata_testmsr(struct cpu_info * ci,errata_t * e)580 x86_errata_testmsr(struct cpu_info *ci, errata_t *e)
581 {
582 	uint64_t val;
583 
584 	(void)ci;
585 
586 	val = rdmsr_locked(e->e_data1);
587 	if ((val & e->e_data2) != 0)
588 		return FALSE;
589 
590 	e->e_reported = TRUE;
591 	return TRUE;
592 }
593 
594 static bool
x86_errata_setmsr(struct cpu_info * ci,errata_t * e)595 x86_errata_setmsr(struct cpu_info *ci, errata_t *e)
596 {
597 	uint64_t val;
598 
599 	(void)ci;
600 
601 	val = rdmsr_locked(e->e_data1);
602 	if ((val & e->e_data2) != 0)
603 		return FALSE;
604 	wrmsr_locked(e->e_data1, val | e->e_data2);
605 	x86_errata_log(ci->ci_dev, e, "patched");
606 
607 	return FALSE;
608 }
609 
610 void
x86_errata(void)611 x86_errata(void)
612 {
613 	struct cpu_info *ci;
614 	uint32_t descs[4];
615 	errata_t *e, *ex;
616 	cpurev_t rev;
617 	int i, j, upgrade;
618 	static int again;
619 
620 	/* don't run if we are under a hypervisor */
621 	if (cpu_feature[1] & CPUID2_RAZ)
622 		return;
623 
624 	/* only for AMD */
625 	if (cpu_vendor != CPUVENDOR_AMD)
626 		return;
627 
628 	ci = curcpu();
629 
630 	x86_cpuid(0x80000001, descs);
631 	if (CPU_IS_PRIMARY(ci)) {
632 		aprint_verbose_dev(ci->ci_dev,
633 		    "searching errata for cpu revision 0x%08"PRIx32"\n",
634 		    descs[0]);
635 	}
636 
637 	for (i = 0;; i += 2) {
638 		if ((rev = cpurevs[i]) == OINK)
639 			return;
640 		if (cpurevs[i + 1] == descs[0])
641 			break;
642 	}
643 
644 	ex = errata + __arraycount(errata);
645 	for (upgrade = 0, e = errata; e < ex; e++) {
646 		if (e->e_reported)
647 			continue;
648 		if (e->e_set != NULL) {
649 			for (j = 0; e->e_set[j] != OINK; j++)
650 				if (e->e_set[j] == rev)
651 					break;
652 			if (e->e_set[j] == OINK)
653 				continue;
654 		}
655 
656 		x86_errata_log(ci->ci_dev, e, "testing");
657 
658 		if (e->e_act == NULL)
659 			e->e_reported = TRUE;
660 		else if ((*e->e_act)(ci, e) == FALSE)
661 			continue;
662 
663 		x86_errata_log(ci->ci_dev, e, "present");
664 		upgrade = 1;
665 	}
666 
667 	if (upgrade && !again) {
668 		again = 1;
669 		aprint_normal_dev(ci->ci_dev, "WARNING: errata present,"
670 		    " BIOS upgrade may be\n");
671 		aprint_normal_dev(ci->ci_dev, "WARNING: necessary to ensure"
672 		    " reliable operation\n");
673 	}
674 }
675