xref: /netbsd-src/sys/arch/powerpc/ibm4xx/dev/ecc_plb.c (revision 0ece553e21bb21e22506ec4b07fc6913593f1fa5)
1*0ece553eSrin /*	$NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $	*/
25b79fdfaSscw 
35b79fdfaSscw /*
45b79fdfaSscw  * Copyright 2001 Wasabi Systems, Inc.
55b79fdfaSscw  * All rights reserved.
65b79fdfaSscw  *
75b79fdfaSscw  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
85b79fdfaSscw  *
95b79fdfaSscw  * Redistribution and use in source and binary forms, with or without
105b79fdfaSscw  * modification, are permitted provided that the following conditions
115b79fdfaSscw  * are met:
125b79fdfaSscw  * 1. Redistributions of source code must retain the above copyright
135b79fdfaSscw  *    notice, this list of conditions and the following disclaimer.
145b79fdfaSscw  * 2. Redistributions in binary form must reproduce the above copyright
155b79fdfaSscw  *    notice, this list of conditions and the following disclaimer in the
165b79fdfaSscw  *    documentation and/or other materials provided with the distribution.
175b79fdfaSscw  * 3. All advertising materials mentioning features or use of this software
185b79fdfaSscw  *    must display the following acknowledgement:
195b79fdfaSscw  *      This product includes software developed for the NetBSD Project by
205b79fdfaSscw  *      Wasabi Systems, Inc.
215b79fdfaSscw  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
225b79fdfaSscw  *    or promote products derived from this software without specific prior
235b79fdfaSscw  *    written permission.
245b79fdfaSscw  *
255b79fdfaSscw  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
265b79fdfaSscw  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
275b79fdfaSscw  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
285b79fdfaSscw  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
295b79fdfaSscw  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
305b79fdfaSscw  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
315b79fdfaSscw  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
325b79fdfaSscw  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
335b79fdfaSscw  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
345b79fdfaSscw  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
355b79fdfaSscw  * POSSIBILITY OF SUCH DAMAGE.
365b79fdfaSscw  */
375b79fdfaSscw 
38ed517291Slukem #include <sys/cdefs.h>
39*0ece553eSrin __KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.16 2021/02/27 20:43:58 rin Exp $");
40ed517291Slukem 
415b79fdfaSscw #include "locators.h"
425b79fdfaSscw 
435b79fdfaSscw #include <sys/param.h>
445b79fdfaSscw #include <sys/systm.h>
455b79fdfaSscw #include <sys/device.h>
461fd2c684Smatt #include <sys/cpu.h>
47fb44a857Sthorpej 
48fb44a857Sthorpej #include <prop/proplib.h>
495b79fdfaSscw 
501fd2c684Smatt #include <powerpc/ibm4xx/cpu.h>
512692e2e2Skiyohara #include <powerpc/ibm4xx/dcr4xx.h>
525b79fdfaSscw #include <powerpc/ibm4xx/dev/plbvar.h>
535b79fdfaSscw 
545b79fdfaSscw 
555b79fdfaSscw struct ecc_plb_softc {
56036ca983Smatt 	device_t sc_dev;
57036ca983Smatt 	uint64_t sc_ecc_tb;
58036ca983Smatt 	uint64_t sc_ecc_iv;	 /* Interval */
59036ca983Smatt 	uint32_t sc_ecc_cnt;
605b79fdfaSscw 	u_int sc_memsize;
615b79fdfaSscw 	int sc_irq;
625b79fdfaSscw };
635b79fdfaSscw 
64036ca983Smatt static int	ecc_plbmatch(device_t, cfdata_t, void *);
65036ca983Smatt static void	ecc_plbattach(device_t, device_t, void *);
66036ca983Smatt static void	ecc_plb_deferred(device_t);
675b79fdfaSscw static int	ecc_plb_intr(void *);
685b79fdfaSscw 
69036ca983Smatt CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc),
7089bf5a8fSthorpej     ecc_plbmatch, ecc_plbattach, NULL, NULL);
715b79fdfaSscw 
725b79fdfaSscw static int ecc_plb_found;
735b79fdfaSscw 
745b79fdfaSscw static int
ecc_plbmatch(device_t parent,cfdata_t cf,void * aux)75036ca983Smatt ecc_plbmatch(device_t parent, cfdata_t cf, void *aux)
765b79fdfaSscw {
775b79fdfaSscw 	struct plb_attach_args *paa = aux;
785b79fdfaSscw 
79d1ad2ac4Sthorpej 	if (strcmp(paa->plb_name, cf->cf_name) != 0)
805b79fdfaSscw 		return (0);
815b79fdfaSscw 
825b79fdfaSscw 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
830f09ed48Sprovos 		panic("ecc_plbmatch: wildcard IRQ not allowed");
845b79fdfaSscw 
855b79fdfaSscw 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
865b79fdfaSscw 
875b79fdfaSscw 	return (!ecc_plb_found);
885b79fdfaSscw }
895b79fdfaSscw 
905b79fdfaSscw static void
ecc_plbattach(device_t parent,device_t self,void * aux)91036ca983Smatt ecc_plbattach(device_t parent, device_t self, void *aux)
925b79fdfaSscw {
93036ca983Smatt 	struct ecc_plb_softc *sc = device_private(self);
945b79fdfaSscw 	struct plb_attach_args *paa = aux;
955b79fdfaSscw 	unsigned int processor_freq;
965b79fdfaSscw 	unsigned int memsiz;
97fb44a857Sthorpej 	prop_number_t pn;
985b79fdfaSscw 
995b79fdfaSscw 	ecc_plb_found++;
1005b79fdfaSscw 
101fb44a857Sthorpej 	pn = prop_dictionary_get(board_properties, "processor-frequency");
102fb44a857Sthorpej 	KASSERT(pn != NULL);
103fb44a857Sthorpej 	processor_freq = (unsigned int) prop_number_integer_value(pn);
1045b79fdfaSscw 
105fb44a857Sthorpej 	pn = prop_dictionary_get(board_properties, "mem-size");
106fb44a857Sthorpej 	KASSERT(pn != NULL);
107fb44a857Sthorpej 	memsiz = (unsigned int) prop_number_integer_value(pn);
1085b79fdfaSscw 
109036ca983Smatt 	aprint_normal(": ECC controller\n");
1105b79fdfaSscw 
111036ca983Smatt 	sc->sc_dev = self;
1125b79fdfaSscw 	sc->sc_ecc_tb = 0;
1135b79fdfaSscw 	sc->sc_ecc_cnt = 0;
1145b79fdfaSscw 	sc->sc_ecc_iv = processor_freq; /* Set interval */
1155b79fdfaSscw 	sc->sc_memsize = memsiz;
1165b79fdfaSscw 	sc->sc_irq = paa->plb_irq;
1175b79fdfaSscw 
1185b79fdfaSscw 	/*
1195b79fdfaSscw 	 * Defer hooking the interrupt until all PLB devices have attached
1205b79fdfaSscw 	 * since the interrupt controller may well be one of those devices...
1215b79fdfaSscw 	 */
1225b79fdfaSscw 	config_defer(self, ecc_plb_deferred);
1235b79fdfaSscw }
1245b79fdfaSscw 
1255b79fdfaSscw static void
ecc_plb_deferred(device_t self)126036ca983Smatt ecc_plb_deferred(device_t self)
1275b79fdfaSscw {
128036ca983Smatt 	struct ecc_plb_softc *sc = device_private(self);
1295b79fdfaSscw 
130*0ece553eSrin 	intr_establish_xname(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr,
131*0ece553eSrin 	    sc, device_xname(self));
1325b79fdfaSscw }
1335b79fdfaSscw 
1345b79fdfaSscw /*
1355b79fdfaSscw  * ECC fault handler.
1365b79fdfaSscw  */
1375b79fdfaSscw static int
ecc_plb_intr(void * arg)1385b79fdfaSscw ecc_plb_intr(void *arg)
1395b79fdfaSscw {
1405b79fdfaSscw 	struct ecc_plb_softc *sc = arg;
1415b79fdfaSscw 	u_int32_t		esr, ear;
142c4a0cfafSmartin 	int			ue;
1435b79fdfaSscw 	u_quad_t		tb;
1445b79fdfaSscw 	u_long			tmp, msr, dat;
1455b79fdfaSscw 
1465b79fdfaSscw 	/* This code needs to be improved to handle double-bit errors */
1475b79fdfaSscw 	/* in some intelligent fashion. */
1485b79fdfaSscw 
1495b79fdfaSscw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
1505b79fdfaSscw 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
1515b79fdfaSscw 
1525b79fdfaSscw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
1535b79fdfaSscw 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
1545b79fdfaSscw 
1555b79fdfaSscw 	/* Always clear the error to stop the intr ASAP. */
1565b79fdfaSscw 
1575b79fdfaSscw 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
1585b79fdfaSscw 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
1595b79fdfaSscw 
1605b79fdfaSscw 	if (esr == 0x00) {
1615b79fdfaSscw 		/* No current error.  Could happen due to intr. nesting */
1625b79fdfaSscw 		return(1);
1635b79fdfaSscw 	}
1645b79fdfaSscw 
1655b79fdfaSscw 	/*
1665b79fdfaSscw 	 * Only report errors every once per second max. Do this using the TB,
1675b79fdfaSscw 	 * because the system time (via microtime) may be adjusted when the
1685b79fdfaSscw 	 * date is set and can't reliably be used to measure intervals.
1695b79fdfaSscw 	 */
1705b79fdfaSscw 
1712d65de24Sperry 	__asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
1725b79fdfaSscw 		: "=r"(tb), "=r"(tmp));
1735b79fdfaSscw 	sc->sc_ecc_cnt++;
1745b79fdfaSscw 
1755b79fdfaSscw 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
1765b79fdfaSscw 		return(1);
1775b79fdfaSscw 
1785b79fdfaSscw 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
1795b79fdfaSscw 
1805b79fdfaSscw 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
1815b79fdfaSscw 		"BLCE=%d%d%d%d CBE=%d%d.\n",
1825b79fdfaSscw 		sc->sc_ecc_cnt, esr, ear,
1835b79fdfaSscw 		(ue) ? "Uncorrectable" : "Correctable",
1845b79fdfaSscw 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
1855b79fdfaSscw 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
1865b79fdfaSscw 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
1875b79fdfaSscw 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
1885b79fdfaSscw 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
1895b79fdfaSscw 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
1905b79fdfaSscw 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
1915b79fdfaSscw 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
1925b79fdfaSscw 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
1935b79fdfaSscw 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
1945b79fdfaSscw 
1955b79fdfaSscw 	/* Should check for uncorrectable errors and panic... */
1965b79fdfaSscw 
1975b79fdfaSscw 	if (sc->sc_ecc_cnt > 1000) {
1985b79fdfaSscw 		printf("ECC: Too many errors, recycling entire "
1995b79fdfaSscw 			"SDRAM (size = %d).\n", sc->sc_memsize);
2005b79fdfaSscw 
2015b79fdfaSscw 		/*
2025b79fdfaSscw 		 * Can this code be changed to run without disabling data MMU
2035b79fdfaSscw 		 * and disabling intrs?
2045b79fdfaSscw 		 * Does kernel always map all of physical RAM VA=PA? If so,
2055b79fdfaSscw 		 * just loop over lowmem.
2065b79fdfaSscw 		 */
2072d65de24Sperry 		__asm volatile(
2085b79fdfaSscw 			"mfmsr 	%0;"
2095b79fdfaSscw 			"li	%1, 0x00;"
2105b79fdfaSscw 			"ori	%1, %1, 0x8010;"
2115b79fdfaSscw 			"andc	%1, %0, %1;"
2125b79fdfaSscw 			"mtmsr	%1;"
2135b79fdfaSscw 			"sync;isync;"
2145b79fdfaSscw 			"li	%1, 0x00;"
2155b79fdfaSscw 			"1:"
2165b79fdfaSscw 			"dcbt	0, %1;"
2175b79fdfaSscw 			"sync;isync;"
2185b79fdfaSscw 			"lwz	%2, 0(%1);"
2195b79fdfaSscw 			"stw	%2, 0(%1);"
2205b79fdfaSscw 			"sync;isync;"
2215b79fdfaSscw 			"dcbf	0, %1;"
2225b79fdfaSscw 			"sync;isync;"
2235b79fdfaSscw 			"addi	%1, %1, 0x20;"
2245b79fdfaSscw 			"addic.	%3, %3, -0x20;"
2255b79fdfaSscw 			"bge 	1b;"
2265b79fdfaSscw 			"mtmsr %0;"
2275b79fdfaSscw 			"sync;isync;"
2285b79fdfaSscw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
2295b79fdfaSscw 		: "r" (sc->sc_memsize) : "0" );
2305b79fdfaSscw 
2315b79fdfaSscw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
2325b79fdfaSscw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
2335b79fdfaSscw 
2345b79fdfaSscw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
2355b79fdfaSscw 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
2365b79fdfaSscw 
2375b79fdfaSscw 		/*
2385b79fdfaSscw 		 * Correctable errors here are OK, mem should be clean now.
2395b79fdfaSscw 		 *
2405b79fdfaSscw 		 * Should check for uncorrectable errors and panic...
2415b79fdfaSscw 		 */
2425b79fdfaSscw 		printf("ECC: Recycling complete, ESR=%x. "
2435b79fdfaSscw 			"Checking for persistent errors.\n", esr);
2445b79fdfaSscw 
2452d65de24Sperry 		__asm volatile(
2465b79fdfaSscw 			"mfmsr 	%0;"
2475b79fdfaSscw 			"li	%1, 0x00;"
2485b79fdfaSscw 			"ori	%1, %1, 0x8010;"
2495b79fdfaSscw 			"andc	%1, %0, %1;"
2505b79fdfaSscw 			"mtmsr	%1;"
2515b79fdfaSscw 			"sync;isync;"
2525b79fdfaSscw 			"li	%1, 0x00;"
2535b79fdfaSscw 			"1:"
2545b79fdfaSscw 			"dcbt	0, %1;"
2555b79fdfaSscw 			"sync;isync;"
2565b79fdfaSscw 			"lwz	%2, 0(%1);"
2575b79fdfaSscw 			"stw	%2, 0(%1);"
2585b79fdfaSscw 			"sync;isync;"
2595b79fdfaSscw 			"dcbf	0, %1;"
2605b79fdfaSscw 			"sync;isync;"
2615b79fdfaSscw 			"addi	%1, %1, 0x20;"
2625b79fdfaSscw 			"addic.	%3, %3, -0x20;"
2635b79fdfaSscw 			"bge 	1b;"
2645b79fdfaSscw 			"mtmsr %0;"
2655b79fdfaSscw 			"sync;isync;"
2665b79fdfaSscw 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
2675b79fdfaSscw 		: "r" (sc->sc_memsize) : "0" );
2685b79fdfaSscw 
2695b79fdfaSscw 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
2705b79fdfaSscw 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
2715b79fdfaSscw 
2725b79fdfaSscw 		/*
2735b79fdfaSscw 		 * If esr is non zero here, we're screwed.
2745b79fdfaSscw 		 * Should check this and panic.
2755b79fdfaSscw 		 */
2765b79fdfaSscw 		printf("ECC: Persistent error check complete, "
2775b79fdfaSscw 			"final ESR=%x.\n", esr);
2785b79fdfaSscw 	}
2795b79fdfaSscw 
2805b79fdfaSscw 	sc->sc_ecc_tb = tb;
2815b79fdfaSscw 	sc->sc_ecc_cnt = 0;
2825b79fdfaSscw 
2835b79fdfaSscw 	return(1);
2845b79fdfaSscw }
285