xref: /netbsd-src/sys/arch/x86/x86/errata.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: errata.c,v 1.18 2008/05/25 15:52:07 chris Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Detect, report on, and work around known errata with x86 CPUs.
34  *
35  * This currently only handles AMD CPUs, and is generalised because
36  * there are quite a few problems that the BIOS can patch via MSR,
37  * but it is not known if the OS can patch these yet.  The list is
38  * expected to grow over time.
39  *
40  * The data here are from: Revision Guide for AMD Athlon 64 and
41  * AMD Opteron Processors, Publication #25759, Revision: 3.69,
42  * Issue Date: September 2006
43  *
44  * XXX This should perhaps be integrated with the identcpu code.
45  */
46 
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: errata.c,v 1.18 2008/05/25 15:52:07 chris Exp $");
49 
50 #include <sys/types.h>
51 #include <sys/systm.h>
52 
53 #include <machine/cpu.h>
54 #include <machine/cpufunc.h>
55 #include <machine/specialreg.h>
56 
57 #include <x86/cpuvar.h>
58 #include <x86/cputypes.h>
59 
60 typedef struct errata {
61 	u_short		e_num;
62 	u_short		e_reported;
63 	u_int		e_data1;
64 	const uint8_t	*e_set;
65 	bool		(*e_act)(struct cpu_info *, struct errata *);
66 	uint64_t	e_data2;
67 } errata_t;
68 
69 typedef enum cpurev {
70 	BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
71 	JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
72 	DR_BA, DR_B2, DR_B3,
73 	OINK
74 } cpurev_t;
75 
76 static const u_int cpurevs[] = {
77 	BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
78 	CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
79 	DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
80 	DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
81 	DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
82 	JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
83 	SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
84 	SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
85 	SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
86 	SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
87 	DR_BA, 0x0100f2a, DR_B2, 0x0100f22, DR_B3, 0x0100f23,
88 	OINK
89 };
90 
91 static const uint8_t x86_errata_set1[] = {
92 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
93 };
94 
95 static const uint8_t x86_errata_set2[] = {
96 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
97 };
98 
99 static const uint8_t x86_errata_set3[] = {
100 	JH_E1, DH_E3, OINK
101 };
102 
103 static const uint8_t x86_errata_set4[] = {
104 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
105 	DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
106 };
107 
108 static const uint8_t x86_errata_set5[] = {
109 	SH_B3, OINK
110 };
111 
112 static const uint8_t x86_errata_set6[] = {
113 	SH_C0, SH_CG, DH_CG, CH_CG, OINK
114 };
115 
116 static const uint8_t x86_errata_set7[] = {
117 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
118 };
119 
120 static const uint8_t x86_errata_set8[] = {
121 	BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
122 	DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
123 	JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
124 	SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
125 };
126 
127 static const uint8_t x86_errata_set9[] = {
128 	DR_BA, DR_B2, OINK
129 };
130 
131 static const uint8_t x86_errata_set10[] = {
132 	DR_BA, DR_B2, DR_B3, OINK
133 };
134 
135 static bool x86_errata_setmsr(struct cpu_info *, errata_t *);
136 static bool x86_errata_testmsr(struct cpu_info *, errata_t *);
137 
138 static errata_t errata[] = {
139 	/*
140 	 * 81: Cache Coherency Problem with Hardware Prefetching
141 	 * and Streaming Stores
142 	 */
143 	{
144 		81, FALSE, MSR_DC_CFG, x86_errata_set5,
145 		x86_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
146 	},
147 	/*
148 	 * 86: DRAM Data Masking Feature Can Cause ECC Failures
149 	 */
150 	{
151 		86, FALSE, MSR_NB_CFG, x86_errata_set1,
152 		x86_errata_testmsr, NB_CFG_DISDATMSK
153 	},
154 	/*
155 	 * 89: Potential Deadlock With Locked Transactions
156 	 */
157 	{
158 		89, FALSE, MSR_NB_CFG, x86_errata_set8,
159 		x86_errata_testmsr, NB_CFG_DISIOREQLOCK
160 	},
161 	/*
162 	 * 94: Sequential Prefetch Feature May Cause Incorrect
163 	 * Processor Operation
164 	 */
165 	{
166 		94, FALSE, MSR_IC_CFG, x86_errata_set1,
167 		x86_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
168 	},
169 	/*
170 	 * 97: 128-Bit Streaming Stores May Cause Coherency
171 	 * Failure
172 	 *
173 	 * XXX "This workaround must not be applied to processors
174 	 * prior to revision C0."  We don't apply it, but if it
175 	 * can't be applied, it shouldn't be reported.
176 	 */
177 	{
178 		97, FALSE, MSR_DC_CFG, x86_errata_set6,
179 		x86_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
180 	},
181 	/*
182 	 * 104: DRAM Data Masking Feature Causes ChipKill ECC
183 	 * Failures When Enabled With x8/x16 DRAM Devices
184 	 */
185 	{
186 		104, FALSE, MSR_NB_CFG, x86_errata_set7,
187 		x86_errata_testmsr, NB_CFG_DISDATMSK
188 	},
189 	/*
190 	 * 113: Enhanced Write-Combining Feature Causes System Hang
191 	 */
192 	{
193 		113, FALSE, MSR_BU_CFG, x86_errata_set3,
194 		x86_errata_setmsr, BU_CFG_WBENHWSBDIS
195 	},
196 	/*
197 	 * 69: Multiprocessor Coherency Problem with Hardware
198 	 * Prefetch Mechanism
199 	 */
200 	{
201 		69, FALSE, MSR_BU_CFG, x86_errata_set5,
202 		x86_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
203 	},
204 	/*
205 	 * 101: DRAM Scrubber May Cause Data Corruption When Using
206 	 * Node-Interleaved Memory
207 	 */
208 	{
209 		101, FALSE, 0, x86_errata_set2,
210 		NULL, 0
211 	},
212 	/*
213 	 * 106: Potential Deadlock with Tightly Coupled Semaphores
214 	 * in an MP System
215 	 */
216 	{
217 		106, FALSE, MSR_LS_CFG, x86_errata_set2,
218 		x86_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
219 	},
220 	/*
221 	 * 107: Possible Multiprocessor Coherency Problem with
222 	 * Setting Page Table A/D Bits
223 	 */
224 	{
225 		107, FALSE, MSR_BU_CFG, x86_errata_set2,
226 		x86_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
227 	},
228 	/*
229 	 * 122: TLB Flush Filter May Cause Coherency Problem in
230 	 * Multiprocessor Systems
231 	 */
232 	{
233 		122, FALSE, MSR_HWCR, x86_errata_set4,
234 		x86_errata_setmsr, HWCR_FFDIS
235 	},
236 	/*
237 	 * 254: Internal Resource Livelock Involving Cached TLB Reload
238 	 */
239 	{
240 		254, FALSE, MSR_BU_CFG, x86_errata_set9,
241 		x86_errata_testmsr, BU_CFG_ERRATA_254
242 	},
243 	/*
244 	 * 261: Processor May Stall Entering Stop-Grant Due to Pending Data
245 	 * Cache Scrub
246 	 */
247 	{
248 		261, FALSE, MSR_DC_CFG, x86_errata_set10,
249 		x86_errata_testmsr, DC_CFG_ERRATA_261
250 	},
251 	/*
252 	 * 298: L2 Eviction May Occur During Processor Operation To Set
253 	 * Accessed or Dirty Bit
254 	 */
255 	{
256 		298, FALSE, MSR_HWCR, x86_errata_set9,
257 		x86_errata_testmsr, HWCR_TLBCACHEDIS
258 	},
259 	{
260 		298, FALSE, MSR_BU_CFG, x86_errata_set9,
261 		x86_errata_testmsr, BU_CFG_ERRATA_298
262 	},
263 	/*
264 	 * 309: Processor Core May Execute Incorrect Instructions on
265 	 * Concurrent L2 and Northbridge Response
266 	 */
267 	{
268 		309, FALSE, MSR_BU_CFG, x86_errata_set9,
269 		x86_errata_testmsr, BU_CFG_ERRATA_309
270 	},
271 };
272 
273 static bool
274 x86_errata_testmsr(struct cpu_info *ci, errata_t *e)
275 {
276 	uint64_t val;
277 
278 	(void)ci;
279 
280 	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
281 	if ((val & e->e_data2) != 0)
282 		return FALSE;
283 
284 	e->e_reported = TRUE;
285 	return TRUE;
286 }
287 
288 static bool
289 x86_errata_setmsr(struct cpu_info *ci, errata_t *e)
290 {
291 	uint64_t val;
292 
293 	(void)ci;
294 
295 	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
296 	if ((val & e->e_data2) != 0)
297 		return FALSE;
298 	wrmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE, val | e->e_data2);
299 	aprint_debug_dev(ci->ci_dev, "erratum %d patched\n",
300 	    e->e_num);
301 
302 	return FALSE;
303 }
304 
305 void
306 x86_errata(void)
307 {
308 	struct cpu_info *ci;
309 	uint32_t descs[4];
310 	errata_t *e, *ex;
311 	cpurev_t rev;
312 	int i, j, upgrade;
313 	static int again;
314 
315 	if (cpu_vendor != CPUVENDOR_AMD)
316 		return;
317 
318 	ci = curcpu();
319 
320 	x86_cpuid(0x80000001, descs);
321 
322 	for (i = 0;; i += 2) {
323 		if ((rev = cpurevs[i]) == OINK)
324 			return;
325 		if (cpurevs[i + 1] == descs[0])
326 			break;
327 	}
328 
329 	ex = errata + sizeof(errata) / sizeof(errata[0]);
330 	for (upgrade = 0, e = errata; e < ex; e++) {
331 		if (e->e_reported)
332 			continue;
333 		if (e->e_set != NULL) {
334 			for (j = 0; e->e_set[j] != OINK; j++)
335 				if (e->e_set[j] == rev)
336 					break;
337 			if (e->e_set[j] == OINK)
338 				continue;
339 		}
340 
341 		aprint_debug_dev(ci->ci_dev, "testing for erratum %d\n",
342 		    e->e_num);
343 
344 		if (e->e_act == NULL)
345 			e->e_reported = TRUE;
346 		else if ((*e->e_act)(ci, e) == FALSE)
347 			continue;
348 
349 		aprint_verbose_dev(ci->ci_dev, "erratum %d present\n",
350 		    e->e_num);
351 		upgrade = 1;
352 	}
353 
354 	if (upgrade && !again) {
355 		again = 1;
356 		aprint_normal_dev(ci->ci_dev, "WARNING: errata present, BIOS upgrade "
357 		    "may be\n");
358 		aprint_normal_dev(ci->ci_dev, "WARNING: necessary to ensure reliable "
359 		    "operation\n");
360 	}
361 }
362