1 /* $NetBSD: ecc_plb.c,v 1.8 2003/07/15 02:54:44 lukem Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.8 2003/07/15 02:54:44 lukem Exp $"); 40 41 #include "locators.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/device.h> 46 #include <sys/properties.h> 47 48 #include <machine/cpu.h> 49 #include <powerpc/ibm4xx/dcr405gp.h> 50 #include <powerpc/ibm4xx/dev/plbvar.h> 51 52 53 struct ecc_plb_softc { 54 struct device sc_dev; 55 u_quad_t sc_ecc_tb; 56 u_quad_t sc_ecc_iv; /* Interval */ 57 u_int32_t sc_ecc_cnt; 58 u_int sc_memsize; 59 int sc_irq; 60 }; 61 62 static int ecc_plbmatch(struct device *, struct cfdata *, void *); 63 static void ecc_plbattach(struct device *, struct device *, void *); 64 static void ecc_plb_deferred(struct device *); 65 static int ecc_plb_intr(void *); 66 67 CFATTACH_DECL(ecc_plb, sizeof(struct ecc_plb_softc), 68 ecc_plbmatch, ecc_plbattach, NULL, NULL); 69 70 static int ecc_plb_found; 71 72 static int 73 ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux) 74 { 75 struct plb_attach_args *paa = aux; 76 77 if (strcmp(paa->plb_name, cf->cf_name) != 0) 78 return (0); 79 80 if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT) 81 panic("ecc_plbmatch: wildcard IRQ not allowed"); 82 83 paa->plb_irq = cf->cf_loc[PLBCF_IRQ]; 84 85 return (!ecc_plb_found); 86 } 87 88 static void 89 ecc_plbattach(struct device *parent, struct device *self, void *aux) 90 { 91 struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self; 92 struct plb_attach_args *paa = aux; 93 unsigned int processor_freq; 94 unsigned int memsiz; 95 96 ecc_plb_found++; 97 98 if (board_info_get("processor-frequency", 99 &processor_freq, sizeof(processor_freq)) == -1) 100 panic("no processor-frequency"); 101 102 if (board_info_get("mem-size", &memsiz, sizeof(memsiz)) == -1) 103 panic("no mem-size"); 104 105 printf(": ECC controller\n"); 106 107 sc->sc_ecc_tb = 0; 108 sc->sc_ecc_cnt = 0; 109 sc->sc_ecc_iv = processor_freq; /* Set interval */ 110 sc->sc_memsize = memsiz; 111 sc->sc_irq = paa->plb_irq; 112 113 /* 114 * Defer hooking the interrupt until all PLB devices have attached 115 * since the interrupt controller may well be one of those devices... 116 */ 117 config_defer(self, ecc_plb_deferred); 118 } 119 120 static void 121 ecc_plb_deferred(struct device *self) 122 { 123 struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self; 124 125 intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL); 126 } 127 128 /* 129 * ECC fault handler. 130 */ 131 static int 132 ecc_plb_intr(void *arg) 133 { 134 struct ecc_plb_softc *sc = arg; 135 u_int32_t esr, ear; 136 int ce, ue; 137 u_quad_t tb; 138 u_long tmp, msr, dat; 139 140 /* This code needs to be improved to handle double-bit errors */ 141 /* in some intelligent fashion. */ 142 143 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 144 esr = mfdcr(DCR_SDRAM0_CFGDATA); 145 146 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR); 147 ear = mfdcr(DCR_SDRAM0_CFGDATA); 148 149 /* Always clear the error to stop the intr ASAP. */ 150 151 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 152 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 153 154 if (esr == 0x00) { 155 /* No current error. Could happen due to intr. nesting */ 156 return(1); 157 } 158 159 /* 160 * Only report errors every once per second max. Do this using the TB, 161 * because the system time (via microtime) may be adjusted when the 162 * date is set and can't reliably be used to measure intervals. 163 */ 164 165 asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b" 166 : "=r"(tb), "=r"(tmp)); 167 sc->sc_ecc_cnt++; 168 169 if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv) 170 return(1); 171 172 ce = (esr & SDRAM0_ECCESR_CE) != 0x00; 173 ue = (esr & SDRAM0_ECCESR_UE) != 0x00; 174 175 printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d " 176 "BLCE=%d%d%d%d CBE=%d%d.\n", 177 sc->sc_ecc_cnt, esr, ear, 178 (ue) ? "Uncorrectable" : "Correctable", 179 ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00), 180 ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00), 181 ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00), 182 ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00), 183 ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00), 184 ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00), 185 ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00), 186 ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00), 187 ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00), 188 ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00)); 189 190 /* Should check for uncorrectable errors and panic... */ 191 192 if (sc->sc_ecc_cnt > 1000) { 193 printf("ECC: Too many errors, recycling entire " 194 "SDRAM (size = %d).\n", sc->sc_memsize); 195 196 /* 197 * Can this code be changed to run without disabling data MMU 198 * and disabling intrs? 199 * Does kernel always map all of physical RAM VA=PA? If so, 200 * just loop over lowmem. 201 */ 202 asm volatile( 203 "mfmsr %0;" 204 "li %1, 0x00;" 205 "ori %1, %1, 0x8010;" 206 "andc %1, %0, %1;" 207 "mtmsr %1;" 208 "sync;isync;" 209 "li %1, 0x00;" 210 "1:" 211 "dcbt 0, %1;" 212 "sync;isync;" 213 "lwz %2, 0(%1);" 214 "stw %2, 0(%1);" 215 "sync;isync;" 216 "dcbf 0, %1;" 217 "sync;isync;" 218 "addi %1, %1, 0x20;" 219 "addic. %3, %3, -0x20;" 220 "bge 1b;" 221 "mtmsr %0;" 222 "sync;isync;" 223 : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 224 : "r" (sc->sc_memsize) : "0" ); 225 226 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 227 esr = mfdcr(DCR_SDRAM0_CFGDATA); 228 229 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 230 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 231 232 /* 233 * Correctable errors here are OK, mem should be clean now. 234 * 235 * Should check for uncorrectable errors and panic... 236 */ 237 printf("ECC: Recycling complete, ESR=%x. " 238 "Checking for persistent errors.\n", esr); 239 240 asm volatile( 241 "mfmsr %0;" 242 "li %1, 0x00;" 243 "ori %1, %1, 0x8010;" 244 "andc %1, %0, %1;" 245 "mtmsr %1;" 246 "sync;isync;" 247 "li %1, 0x00;" 248 "1:" 249 "dcbt 0, %1;" 250 "sync;isync;" 251 "lwz %2, 0(%1);" 252 "stw %2, 0(%1);" 253 "sync;isync;" 254 "dcbf 0, %1;" 255 "sync;isync;" 256 "addi %1, %1, 0x20;" 257 "addic. %3, %3, -0x20;" 258 "bge 1b;" 259 "mtmsr %0;" 260 "sync;isync;" 261 : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 262 : "r" (sc->sc_memsize) : "0" ); 263 264 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 265 esr = mfdcr(DCR_SDRAM0_CFGDATA); 266 267 /* 268 * If esr is non zero here, we're screwed. 269 * Should check this and panic. 270 */ 271 printf("ECC: Persistent error check complete, " 272 "final ESR=%x.\n", esr); 273 } 274 275 sc->sc_ecc_tb = tb; 276 sc->sc_ecc_cnt = 0; 277 278 return(1); 279 } 280