xref: /netbsd-src/sys/arch/powerpc/ibm4xx/dev/ecc_plb.c (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1 /*	$NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.15 2014/02/25 14:09:13 martin Exp $");
40 
41 #include "locators.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/device.h>
46 #include <sys/cpu.h>
47 
48 #include <prop/proplib.h>
49 
50 #include <powerpc/ibm4xx/cpu.h>
51 #include <powerpc/ibm4xx/dcr4xx.h>
52 #include <powerpc/ibm4xx/dev/plbvar.h>
53 
54 
55 struct ecc_plb_softc {
56 	device_t sc_dev;
57 	uint64_t sc_ecc_tb;
58 	uint64_t sc_ecc_iv;	 /* Interval */
59 	uint32_t sc_ecc_cnt;
60 	u_int sc_memsize;
61 	int sc_irq;
62 };
63 
64 static int	ecc_plbmatch(device_t, cfdata_t, void *);
65 static void	ecc_plbattach(device_t, device_t, void *);
66 static void	ecc_plb_deferred(device_t);
67 static int	ecc_plb_intr(void *);
68 
69 CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc),
70     ecc_plbmatch, ecc_plbattach, NULL, NULL);
71 
72 static int ecc_plb_found;
73 
74 static int
75 ecc_plbmatch(device_t parent, cfdata_t cf, void *aux)
76 {
77 	struct plb_attach_args *paa = aux;
78 
79 	if (strcmp(paa->plb_name, cf->cf_name) != 0)
80 		return (0);
81 
82 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
83 		panic("ecc_plbmatch: wildcard IRQ not allowed");
84 
85 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
86 
87 	return (!ecc_plb_found);
88 }
89 
90 static void
91 ecc_plbattach(device_t parent, device_t self, void *aux)
92 {
93 	struct ecc_plb_softc *sc = device_private(self);
94 	struct plb_attach_args *paa = aux;
95 	unsigned int processor_freq;
96 	unsigned int memsiz;
97 	prop_number_t pn;
98 
99 	ecc_plb_found++;
100 
101 	pn = prop_dictionary_get(board_properties, "processor-frequency");
102 	KASSERT(pn != NULL);
103 	processor_freq = (unsigned int) prop_number_integer_value(pn);
104 
105 	pn = prop_dictionary_get(board_properties, "mem-size");
106 	KASSERT(pn != NULL);
107 	memsiz = (unsigned int) prop_number_integer_value(pn);
108 
109 	aprint_normal(": ECC controller\n");
110 
111 	sc->sc_dev = self;
112 	sc->sc_ecc_tb = 0;
113 	sc->sc_ecc_cnt = 0;
114 	sc->sc_ecc_iv = processor_freq; /* Set interval */
115 	sc->sc_memsize = memsiz;
116 	sc->sc_irq = paa->plb_irq;
117 
118 	/*
119 	 * Defer hooking the interrupt until all PLB devices have attached
120 	 * since the interrupt controller may well be one of those devices...
121 	 */
122 	config_defer(self, ecc_plb_deferred);
123 }
124 
125 static void
126 ecc_plb_deferred(device_t self)
127 {
128 	struct ecc_plb_softc *sc = device_private(self);
129 
130 	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, sc);
131 }
132 
133 /*
134  * ECC fault handler.
135  */
136 static int
137 ecc_plb_intr(void *arg)
138 {
139 	struct ecc_plb_softc *sc = arg;
140 	u_int32_t		esr, ear;
141 	int			ue;
142 	u_quad_t		tb;
143 	u_long			tmp, msr, dat;
144 
145 	/* This code needs to be improved to handle double-bit errors */
146 	/* in some intelligent fashion. */
147 
148 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
149 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
150 
151 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
152 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
153 
154 	/* Always clear the error to stop the intr ASAP. */
155 
156 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
157 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
158 
159 	if (esr == 0x00) {
160 		/* No current error.  Could happen due to intr. nesting */
161 		return(1);
162 	}
163 
164 	/*
165 	 * Only report errors every once per second max. Do this using the TB,
166 	 * because the system time (via microtime) may be adjusted when the
167 	 * date is set and can't reliably be used to measure intervals.
168 	 */
169 
170 	__asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
171 		: "=r"(tb), "=r"(tmp));
172 	sc->sc_ecc_cnt++;
173 
174 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
175 		return(1);
176 
177 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
178 
179 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
180 		"BLCE=%d%d%d%d CBE=%d%d.\n",
181 		sc->sc_ecc_cnt, esr, ear,
182 		(ue) ? "Uncorrectable" : "Correctable",
183 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
184 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
185 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
186 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
187 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
188 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
189 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
190 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
191 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
192 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
193 
194 	/* Should check for uncorrectable errors and panic... */
195 
196 	if (sc->sc_ecc_cnt > 1000) {
197 		printf("ECC: Too many errors, recycling entire "
198 			"SDRAM (size = %d).\n", sc->sc_memsize);
199 
200 		/*
201 		 * Can this code be changed to run without disabling data MMU
202 		 * and disabling intrs?
203 		 * Does kernel always map all of physical RAM VA=PA? If so,
204 		 * just loop over lowmem.
205 		 */
206 		__asm volatile(
207 			"mfmsr 	%0;"
208 			"li	%1, 0x00;"
209 			"ori	%1, %1, 0x8010;"
210 			"andc	%1, %0, %1;"
211 			"mtmsr	%1;"
212 			"sync;isync;"
213 			"li	%1, 0x00;"
214 			"1:"
215 			"dcbt	0, %1;"
216 			"sync;isync;"
217 			"lwz	%2, 0(%1);"
218 			"stw	%2, 0(%1);"
219 			"sync;isync;"
220 			"dcbf	0, %1;"
221 			"sync;isync;"
222 			"addi	%1, %1, 0x20;"
223 			"addic.	%3, %3, -0x20;"
224 			"bge 	1b;"
225 			"mtmsr %0;"
226 			"sync;isync;"
227 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
228 		: "r" (sc->sc_memsize) : "0" );
229 
230 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
231 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
232 
233 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
234 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
235 
236 		/*
237 		 * Correctable errors here are OK, mem should be clean now.
238 		 *
239 		 * Should check for uncorrectable errors and panic...
240 		 */
241 		printf("ECC: Recycling complete, ESR=%x. "
242 			"Checking for persistent errors.\n", esr);
243 
244 		__asm volatile(
245 			"mfmsr 	%0;"
246 			"li	%1, 0x00;"
247 			"ori	%1, %1, 0x8010;"
248 			"andc	%1, %0, %1;"
249 			"mtmsr	%1;"
250 			"sync;isync;"
251 			"li	%1, 0x00;"
252 			"1:"
253 			"dcbt	0, %1;"
254 			"sync;isync;"
255 			"lwz	%2, 0(%1);"
256 			"stw	%2, 0(%1);"
257 			"sync;isync;"
258 			"dcbf	0, %1;"
259 			"sync;isync;"
260 			"addi	%1, %1, 0x20;"
261 			"addic.	%3, %3, -0x20;"
262 			"bge 	1b;"
263 			"mtmsr %0;"
264 			"sync;isync;"
265 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
266 		: "r" (sc->sc_memsize) : "0" );
267 
268 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
269 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
270 
271 		/*
272 		 * If esr is non zero here, we're screwed.
273 		 * Should check this and panic.
274 		 */
275 		printf("ECC: Persistent error check complete, "
276 			"final ESR=%x.\n", esr);
277 	}
278 
279 	sc->sc_ecc_tb = tb;
280 	sc->sc_ecc_cnt = 0;
281 
282 	return(1);
283 }
284