1 /* $NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $ */
2
3 /*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $");
40
41 #include "locators.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/device.h>
46 #include <sys/cpu.h>
47
48 #include <prop/proplib.h>
49
50 #include <powerpc/ibm4xx/cpu.h>
51 #include <powerpc/ibm4xx/dcr4xx.h>
52 #include <powerpc/ibm4xx/dev/plbvar.h>
53
54
55 struct ecc_plb_softc {
56 device_t sc_dev;
57 uint64_t sc_ecc_tb;
58 uint64_t sc_ecc_iv; /* Interval */
59 uint32_t sc_ecc_cnt;
60 u_int sc_memsize;
61 int sc_irq;
62 };
63
64 static int ecc_plbmatch(device_t, cfdata_t, void *);
65 static void ecc_plbattach(device_t, device_t, void *);
66 static void ecc_plb_deferred(device_t);
67 static int ecc_plb_intr(void *);
68
69 CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc),
70 ecc_plbmatch, ecc_plbattach, NULL, NULL);
71
72 static int ecc_plb_found;
73
74 static int
ecc_plbmatch(device_t parent,cfdata_t cf,void * aux)75 ecc_plbmatch(device_t parent, cfdata_t cf, void *aux)
76 {
77 struct plb_attach_args *paa = aux;
78
79 if (strcmp(paa->plb_name, cf->cf_name) != 0)
80 return (0);
81
82 if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
83 panic("ecc_plbmatch: wildcard IRQ not allowed");
84
85 paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
86
87 return (!ecc_plb_found);
88 }
89
90 static void
ecc_plbattach(device_t parent,device_t self,void * aux)91 ecc_plbattach(device_t parent, device_t self, void *aux)
92 {
93 struct ecc_plb_softc *sc = device_private(self);
94 struct plb_attach_args *paa = aux;
95 unsigned int processor_freq;
96 unsigned int memsiz;
97 prop_number_t pn;
98
99 ecc_plb_found++;
100
101 pn = prop_dictionary_get(board_properties, "processor-frequency");
102 KASSERT(pn != NULL);
103 processor_freq = (unsigned int) prop_number_integer_value(pn);
104
105 pn = prop_dictionary_get(board_properties, "mem-size");
106 KASSERT(pn != NULL);
107 memsiz = (unsigned int) prop_number_integer_value(pn);
108
109 aprint_normal(": ECC controller\n");
110
111 sc->sc_dev = self;
112 sc->sc_ecc_tb = 0;
113 sc->sc_ecc_cnt = 0;
114 sc->sc_ecc_iv = processor_freq; /* Set interval */
115 sc->sc_memsize = memsiz;
116 sc->sc_irq = paa->plb_irq;
117
118 /*
119 * Defer hooking the interrupt until all PLB devices have attached
120 * since the interrupt controller may well be one of those devices...
121 */
122 config_defer(self, ecc_plb_deferred);
123 }
124
125 static void
ecc_plb_deferred(device_t self)126 ecc_plb_deferred(device_t self)
127 {
128 struct ecc_plb_softc *sc = device_private(self);
129
130 intr_establish_xname(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr,
131 sc, device_xname(self));
132 }
133
134 /*
135 * ECC fault handler.
136 */
137 static int
ecc_plb_intr(void * arg)138 ecc_plb_intr(void *arg)
139 {
140 struct ecc_plb_softc *sc = arg;
141 u_int32_t esr, ear;
142 int ue;
143 u_quad_t tb;
144 u_long tmp, msr, dat;
145
146 /* This code needs to be improved to handle double-bit errors */
147 /* in some intelligent fashion. */
148
149 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
150 esr = mfdcr(DCR_SDRAM0_CFGDATA);
151
152 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
153 ear = mfdcr(DCR_SDRAM0_CFGDATA);
154
155 /* Always clear the error to stop the intr ASAP. */
156
157 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
158 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
159
160 if (esr == 0x00) {
161 /* No current error. Could happen due to intr. nesting */
162 return(1);
163 }
164
165 /*
166 * Only report errors every once per second max. Do this using the TB,
167 * because the system time (via microtime) may be adjusted when the
168 * date is set and can't reliably be used to measure intervals.
169 */
170
171 __asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
172 : "=r"(tb), "=r"(tmp));
173 sc->sc_ecc_cnt++;
174
175 if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
176 return(1);
177
178 ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
179
180 printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
181 "BLCE=%d%d%d%d CBE=%d%d.\n",
182 sc->sc_ecc_cnt, esr, ear,
183 (ue) ? "Uncorrectable" : "Correctable",
184 ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
185 ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
186 ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
187 ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
188 ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
189 ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
190 ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
191 ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
192 ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
193 ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
194
195 /* Should check for uncorrectable errors and panic... */
196
197 if (sc->sc_ecc_cnt > 1000) {
198 printf("ECC: Too many errors, recycling entire "
199 "SDRAM (size = %d).\n", sc->sc_memsize);
200
201 /*
202 * Can this code be changed to run without disabling data MMU
203 * and disabling intrs?
204 * Does kernel always map all of physical RAM VA=PA? If so,
205 * just loop over lowmem.
206 */
207 __asm volatile(
208 "mfmsr %0;"
209 "li %1, 0x00;"
210 "ori %1, %1, 0x8010;"
211 "andc %1, %0, %1;"
212 "mtmsr %1;"
213 "sync;isync;"
214 "li %1, 0x00;"
215 "1:"
216 "dcbt 0, %1;"
217 "sync;isync;"
218 "lwz %2, 0(%1);"
219 "stw %2, 0(%1);"
220 "sync;isync;"
221 "dcbf 0, %1;"
222 "sync;isync;"
223 "addi %1, %1, 0x20;"
224 "addic. %3, %3, -0x20;"
225 "bge 1b;"
226 "mtmsr %0;"
227 "sync;isync;"
228 : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
229 : "r" (sc->sc_memsize) : "0" );
230
231 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
232 esr = mfdcr(DCR_SDRAM0_CFGDATA);
233
234 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
235 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
236
237 /*
238 * Correctable errors here are OK, mem should be clean now.
239 *
240 * Should check for uncorrectable errors and panic...
241 */
242 printf("ECC: Recycling complete, ESR=%x. "
243 "Checking for persistent errors.\n", esr);
244
245 __asm volatile(
246 "mfmsr %0;"
247 "li %1, 0x00;"
248 "ori %1, %1, 0x8010;"
249 "andc %1, %0, %1;"
250 "mtmsr %1;"
251 "sync;isync;"
252 "li %1, 0x00;"
253 "1:"
254 "dcbt 0, %1;"
255 "sync;isync;"
256 "lwz %2, 0(%1);"
257 "stw %2, 0(%1);"
258 "sync;isync;"
259 "dcbf 0, %1;"
260 "sync;isync;"
261 "addi %1, %1, 0x20;"
262 "addic. %3, %3, -0x20;"
263 "bge 1b;"
264 "mtmsr %0;"
265 "sync;isync;"
266 : "=&r" (msr), "=&r" (tmp), "=&r" (dat)
267 : "r" (sc->sc_memsize) : "0" );
268
269 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
270 esr = mfdcr(DCR_SDRAM0_CFGDATA);
271
272 /*
273 * If esr is non zero here, we're screwed.
274 * Should check this and panic.
275 */
276 printf("ECC: Persistent error check complete, "
277 "final ESR=%x.\n", esr);
278 }
279
280 sc->sc_ecc_tb = tb;
281 sc->sc_ecc_cnt = 0;
282
283 return(1);
284 }
285