xref: /onnv-gate/usr/src/uts/sun4u/io/pci/pci_ecc.c (revision 4039:8a5733b96af9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*4039Skd93003  * Common Development and Distribution License (the "License").
6*4039Skd93003  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*4039Skd93003  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * PCI ECC support
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/types.h>
330Sstevel@tonic-gate #include <sys/systm.h>		/* for strrchr */
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/sunddi.h>
360Sstevel@tonic-gate #include <sys/intr.h>
370Sstevel@tonic-gate #include <sys/async.h>		/* struct async_flt */
380Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
390Sstevel@tonic-gate #include <sys/machsystm.h>
400Sstevel@tonic-gate #include <sys/sysmacros.h>
410Sstevel@tonic-gate #include <sys/fm/protocol.h>
420Sstevel@tonic-gate #include <sys/fm/util.h>
430Sstevel@tonic-gate #include <sys/fm/io/pci.h>
440Sstevel@tonic-gate #include <sys/fm/io/sun4upci.h>
450Sstevel@tonic-gate #include <sys/fm/io/ddi.h>
460Sstevel@tonic-gate #include <sys/pci/pci_obj.h>	/* ld/st physio */
470Sstevel@tonic-gate #include <sys/cpuvar.h>
480Sstevel@tonic-gate #include <sys/errclassify.h>
491186Sayznaga #include <sys/cpu_module.h>
501186Sayznaga #include <sys/async.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate /*LINTLIBRARY*/
530Sstevel@tonic-gate 
540Sstevel@tonic-gate static void ecc_disable(ecc_t *, int);
550Sstevel@tonic-gate static void ecc_delayed_ce(void *);
560Sstevel@tonic-gate static uint64_t ecc_read_afsr(ecc_intr_info_t *);
570Sstevel@tonic-gate static void ecc_ereport_post(dev_info_t *dip, ecc_errstate_t *ecc_err);
580Sstevel@tonic-gate 
590Sstevel@tonic-gate clock_t pci_ecc_panic_delay = 200;
600Sstevel@tonic-gate int ecc_ce_delay_secs = 6;	/* number of sec to delay reenabling of CEs */
610Sstevel@tonic-gate int ecc_ce_delayed = 1;		/* global for enabling/disabling CE delay */
620Sstevel@tonic-gate 
630Sstevel@tonic-gate void
640Sstevel@tonic-gate ecc_create(pci_t *pci_p)
650Sstevel@tonic-gate {
660Sstevel@tonic-gate #ifdef DEBUG
670Sstevel@tonic-gate 	dev_info_t *dip = pci_p->pci_dip;
680Sstevel@tonic-gate #endif
690Sstevel@tonic-gate 	uint64_t cb_base_pa = pci_p->pci_cb_p->cb_base_pa;
700Sstevel@tonic-gate 	ecc_t *ecc_p;
710Sstevel@tonic-gate 
720Sstevel@tonic-gate 	ecc_p = (ecc_t *)kmem_zalloc(sizeof (ecc_t), KM_SLEEP);
730Sstevel@tonic-gate 	ecc_p->ecc_pci_cmn_p = pci_p->pci_common_p;
740Sstevel@tonic-gate 	pci_p->pci_ecc_p = ecc_p;
750Sstevel@tonic-gate 
760Sstevel@tonic-gate 	ecc_p->ecc_ue.ecc_p = ecc_p;
770Sstevel@tonic-gate 	ecc_p->ecc_ue.ecc_type = CBNINTR_UE;
780Sstevel@tonic-gate 	ecc_p->ecc_ce.ecc_p = ecc_p;
790Sstevel@tonic-gate 	ecc_p->ecc_ce.ecc_type = CBNINTR_CE;
800Sstevel@tonic-gate 
810Sstevel@tonic-gate 	pci_ecc_setup(ecc_p);
820Sstevel@tonic-gate 
830Sstevel@tonic-gate 	/*
840Sstevel@tonic-gate 	 * Determine the virtual addresses of the streaming cache
850Sstevel@tonic-gate 	 * control/status and flush registers.
860Sstevel@tonic-gate 	 */
870Sstevel@tonic-gate 	ecc_p->ecc_csr_pa = cb_base_pa + COMMON_ECC_CSR_OFFSET;
880Sstevel@tonic-gate 	ecc_p->ecc_ue.ecc_afsr_pa = cb_base_pa + COMMON_UE_AFSR_OFFSET;
890Sstevel@tonic-gate 	ecc_p->ecc_ue.ecc_afar_pa = cb_base_pa + COMMON_UE_AFAR_OFFSET;
900Sstevel@tonic-gate 	ecc_p->ecc_ce.ecc_afsr_pa = cb_base_pa + COMMON_CE_AFSR_OFFSET;
910Sstevel@tonic-gate 	ecc_p->ecc_ce.ecc_afar_pa = cb_base_pa + COMMON_CE_AFAR_OFFSET;
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 	DEBUG1(DBG_ATTACH, dip, "ecc_create: csr=%x\n", ecc_p->ecc_csr_pa);
940Sstevel@tonic-gate 	DEBUG2(DBG_ATTACH, dip, "ecc_create: ue_afsr=%x, ue_afar=%x\n",
950Sstevel@tonic-gate 		ecc_p->ecc_ue.ecc_afsr_pa, ecc_p->ecc_ue.ecc_afar_pa);
960Sstevel@tonic-gate 	DEBUG2(DBG_ATTACH, dip, "ecc_create: ce_afsr=%x, ce_afar=%x\n",
970Sstevel@tonic-gate 		ecc_p->ecc_ce.ecc_afsr_pa, ecc_p->ecc_ce.ecc_afar_pa);
980Sstevel@tonic-gate 
990Sstevel@tonic-gate 	ecc_configure(pci_p);
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate 	/*
1020Sstevel@tonic-gate 	 * Register routines to be called from system error handling code.
1030Sstevel@tonic-gate 	 */
1040Sstevel@tonic-gate 	bus_func_register(BF_TYPE_ERRDIS, (busfunc_t)ecc_disable_nowait, ecc_p);
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate int
1080Sstevel@tonic-gate ecc_register_intr(pci_t *pci_p)
1090Sstevel@tonic-gate {
1100Sstevel@tonic-gate 	ecc_t *ecc_p = pci_p->pci_ecc_p;
1110Sstevel@tonic-gate 	int ret;
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate 	/*
1140Sstevel@tonic-gate 	 * Install the UE and CE error interrupt handlers.
1150Sstevel@tonic-gate 	 */
1160Sstevel@tonic-gate 	if ((ret = pci_ecc_add_intr(pci_p, CBNINTR_UE, &ecc_p->ecc_ue)) !=
1170Sstevel@tonic-gate 	    DDI_SUCCESS)
1180Sstevel@tonic-gate 		return (ret);
1190Sstevel@tonic-gate 	if ((ret = pci_ecc_add_intr(pci_p, CBNINTR_CE, &ecc_p->ecc_ce)) !=
1200Sstevel@tonic-gate 	    DDI_SUCCESS)
1210Sstevel@tonic-gate 		return (ret);
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate 	return (DDI_SUCCESS);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate void
1270Sstevel@tonic-gate ecc_destroy(pci_t *pci_p)
1280Sstevel@tonic-gate {
1290Sstevel@tonic-gate 	ecc_t *ecc_p = pci_p->pci_ecc_p;
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate 	DEBUG0(DBG_DETACH, pci_p->pci_dip, "ecc_destroy:\n");
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate 	/*
1340Sstevel@tonic-gate 	 * Disable UE and CE ECC error interrupts.
1350Sstevel@tonic-gate 	 */
1360Sstevel@tonic-gate 	ecc_disable_wait(ecc_p);
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 	/*
1390Sstevel@tonic-gate 	 * Remove the ECC interrupt handlers.
1400Sstevel@tonic-gate 	 */
1410Sstevel@tonic-gate 	pci_ecc_rem_intr(pci_p, CBNINTR_UE, &ecc_p->ecc_ue);
1420Sstevel@tonic-gate 	pci_ecc_rem_intr(pci_p, CBNINTR_CE, &ecc_p->ecc_ce);
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate 	/*
1450Sstevel@tonic-gate 	 * Unregister our error handling functions.
1460Sstevel@tonic-gate 	 */
1470Sstevel@tonic-gate 	bus_func_unregister(BF_TYPE_ERRDIS,
1480Sstevel@tonic-gate 	    (busfunc_t)ecc_disable_nowait, ecc_p);
1490Sstevel@tonic-gate 	/*
1500Sstevel@tonic-gate 	 * If a timer has been set, unset it.
1510Sstevel@tonic-gate 	 */
1520Sstevel@tonic-gate 	(void) untimeout(ecc_p->ecc_to_id);
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	kmem_free(ecc_p, sizeof (ecc_t));
1550Sstevel@tonic-gate 	pci_p->pci_ecc_p = NULL;
1560Sstevel@tonic-gate }
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate void
1590Sstevel@tonic-gate ecc_configure(pci_t *pci_p)
1600Sstevel@tonic-gate {
1610Sstevel@tonic-gate 	ecc_t *ecc_p = pci_p->pci_ecc_p;
1620Sstevel@tonic-gate 	dev_info_t *dip = pci_p->pci_dip;
1630Sstevel@tonic-gate 	uint64_t l;
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	/*
1660Sstevel@tonic-gate 	 * Clear any pending ECC errors.
1670Sstevel@tonic-gate 	 */
1680Sstevel@tonic-gate 	DEBUG0(DBG_ATTACH, dip, "ecc_configure: clearing UE and CE errors\n");
1690Sstevel@tonic-gate 	l = (COMMON_ECC_UE_AFSR_E_MASK << COMMON_ECC_UE_AFSR_PE_SHIFT) |
1700Sstevel@tonic-gate 		(COMMON_ECC_UE_AFSR_E_MASK << COMMON_ECC_UE_AFSR_SE_SHIFT);
1710Sstevel@tonic-gate 	stdphysio(ecc_p->ecc_ue.ecc_afsr_pa, l);
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate 	l = (COMMON_ECC_CE_AFSR_E_MASK << COMMON_ECC_CE_AFSR_PE_SHIFT) |
1740Sstevel@tonic-gate 		(COMMON_ECC_CE_AFSR_E_MASK << COMMON_ECC_CE_AFSR_SE_SHIFT);
1750Sstevel@tonic-gate 	stdphysio(ecc_p->ecc_ce.ecc_afsr_pa, l);
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	/*
1780Sstevel@tonic-gate 	 * Enable ECC error detections via the control register.
1790Sstevel@tonic-gate 	 */
1800Sstevel@tonic-gate 	DEBUG0(DBG_ATTACH, dip, "ecc_configure: enabling UE CE detection\n");
1810Sstevel@tonic-gate 	l = COMMON_ECC_CTRL_ECC_EN;
1820Sstevel@tonic-gate 	if (ecc_error_intr_enable)
1830Sstevel@tonic-gate 		l |= COMMON_ECC_CTRL_UE_INTEN | COMMON_ECC_CTRL_CE_INTEN;
1840Sstevel@tonic-gate 	stdphysio(ecc_p->ecc_csr_pa, l);
1850Sstevel@tonic-gate }
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate void
1880Sstevel@tonic-gate ecc_enable_intr(pci_t *pci_p)
1890Sstevel@tonic-gate {
1900Sstevel@tonic-gate 	cb_enable_nintr(pci_p, CBNINTR_UE);
1910Sstevel@tonic-gate 	cb_enable_nintr(pci_p, CBNINTR_CE);
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate void
1950Sstevel@tonic-gate ecc_disable_wait(ecc_t *ecc_p)
1960Sstevel@tonic-gate {
1970Sstevel@tonic-gate 	ecc_disable(ecc_p, IB_INTR_WAIT);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate uint_t
2010Sstevel@tonic-gate ecc_disable_nowait(ecc_t *ecc_p)
2020Sstevel@tonic-gate {
2030Sstevel@tonic-gate 	ecc_disable(ecc_p, IB_INTR_NOWAIT);
2040Sstevel@tonic-gate 	return (BF_NONE);
2050Sstevel@tonic-gate }
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate static void
2080Sstevel@tonic-gate ecc_disable(ecc_t *ecc_p, int wait)
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate 	cb_t *cb_p = ecc_p->ecc_pci_cmn_p->pci_common_cb_p;
2110Sstevel@tonic-gate 	uint64_t csr_pa = ecc_p->ecc_csr_pa;
2120Sstevel@tonic-gate 	uint64_t csr = lddphysio(csr_pa);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	csr &= ~(COMMON_ECC_CTRL_UE_INTEN | COMMON_ECC_CTRL_CE_INTEN);
2150Sstevel@tonic-gate 	stdphysio(csr_pa, csr);
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	cb_disable_nintr(cb_p, CBNINTR_UE, wait);
2180Sstevel@tonic-gate 	cb_disable_nintr(cb_p, CBNINTR_CE, wait);
2190Sstevel@tonic-gate }
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate  * I/O ECC error handling:
2230Sstevel@tonic-gate  *
2240Sstevel@tonic-gate  * Below are the generic functions that handle PCI(pcisch, pcipsy) detected
2250Sstevel@tonic-gate  * ECC errors.
2260Sstevel@tonic-gate  *
2270Sstevel@tonic-gate  * The registered interrupt handler for both pcisch and pcipsy is ecc_intr(),
2280Sstevel@tonic-gate  * it's function is to receive the error, capture some state, and pass that on
2290Sstevel@tonic-gate  * to the ecc_err_handler() for reporting purposes.
2300Sstevel@tonic-gate  *
2310Sstevel@tonic-gate  * ecc_err_handler() gathers more state(via ecc_errstate_get) and attempts
2320Sstevel@tonic-gate  * to handle and report the error. ecc_err_handler() must determine if we need
2330Sstevel@tonic-gate  * to panic due to this error (via pci_ecc_classify, which also decodes the
2340Sstevel@tonic-gate  * ECC afsr), and if any side effects exist that may have caused or are due
2350Sstevel@tonic-gate  * to this error. PBM errors related to the ECC error may exist, to report
2360Sstevel@tonic-gate  * them we call pci_pbm_err_handler() and call ndi_fm_handler_dispatch() so
2370Sstevel@tonic-gate  * that the child devices can log their pci errors.
2380Sstevel@tonic-gate  *
2390Sstevel@tonic-gate  * To report the error we must also get the syndrome and unum, which can not
2400Sstevel@tonic-gate  * be done in high level interrupted context. Therefore we have an error
2410Sstevel@tonic-gate  * queue(pci_ecc_queue) which we dispatch errors to, to report the errors
2420Sstevel@tonic-gate  * (ecc_err_drain()).
2430Sstevel@tonic-gate  *
2440Sstevel@tonic-gate  * ecc_err_drain() will be called when either the softint is triggered
2450Sstevel@tonic-gate  * or the system is panicing. Either way it will gather more information
2460Sstevel@tonic-gate  * about the error from the CPU(via ecc_cpu_call(), ecc.c), attempt to
2470Sstevel@tonic-gate  * retire the faulty page(if error is a UE), and report the detected error.
2480Sstevel@tonic-gate  *
2490Sstevel@tonic-gate  * ecc_delayed_ce() is called via timeout from ecc_err_handler() following
2500Sstevel@tonic-gate  * the receipt of a CE interrupt.  It will be called after 6ms and check to
2510Sstevel@tonic-gate  * see if any new CEs are present, if so we will log and another timeout will
2520Sstevel@tonic-gate  * be set by(ecc_err_handler()).  If no CEs are present then it will re-enable
2530Sstevel@tonic-gate  * CEs by clearing the previous interrupt.  This is to keep the system going
2540Sstevel@tonic-gate  * in the event of a CE storm.
2550Sstevel@tonic-gate  */
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate /*
2580Sstevel@tonic-gate  * Function used to get ECC AFSR register
2590Sstevel@tonic-gate  */
2600Sstevel@tonic-gate static uint64_t
2610Sstevel@tonic-gate ecc_read_afsr(ecc_intr_info_t *ecc_ii_p)
2620Sstevel@tonic-gate {
2630Sstevel@tonic-gate 	uint_t i;
2640Sstevel@tonic-gate 	uint64_t afsr = 0ull;
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate 	ASSERT((ecc_ii_p->ecc_type == CBNINTR_UE) ||
2670Sstevel@tonic-gate 	    (ecc_ii_p->ecc_type == CBNINTR_CE));
2680Sstevel@tonic-gate 	if (!ecc_ii_p->ecc_errpndg_mask)
2690Sstevel@tonic-gate 		return (lddphysio(ecc_ii_p->ecc_afsr_pa));
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	for (i = 0; i < pci_ecc_afsr_retries; i++) {
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 		/*
2740Sstevel@tonic-gate 		 * If we timeout, the logging routine will
2750Sstevel@tonic-gate 		 * know because it will see the ERRPNDG bits
2760Sstevel@tonic-gate 		 * set in the AFSR.
2770Sstevel@tonic-gate 		 */
2780Sstevel@tonic-gate 		afsr = lddphysio(ecc_ii_p->ecc_afsr_pa);
2790Sstevel@tonic-gate 		if ((afsr & ecc_ii_p->ecc_errpndg_mask) == 0)
2800Sstevel@tonic-gate 			break;
2810Sstevel@tonic-gate 	}
2820Sstevel@tonic-gate 	return (afsr);
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate /*
2860Sstevel@tonic-gate  * IO detected ECC error interrupt handler, calls ecc_err_handler to post
2870Sstevel@tonic-gate  * error reports and handle the interrupt. Re-entry into ecc_err_handler
2880Sstevel@tonic-gate  * is protected by the per-chip mutex pci_fm_mutex.
2890Sstevel@tonic-gate  */
2900Sstevel@tonic-gate uint_t
2910Sstevel@tonic-gate ecc_intr(caddr_t a)
2920Sstevel@tonic-gate {
2930Sstevel@tonic-gate 	ecc_intr_info_t *ecc_ii_p = (ecc_intr_info_t *)a;
2940Sstevel@tonic-gate 	ecc_t *ecc_p = ecc_ii_p->ecc_p;
2950Sstevel@tonic-gate 	pci_common_t *cmn_p = ecc_p->ecc_pci_cmn_p;
2960Sstevel@tonic-gate 	ecc_errstate_t ecc_err;
2970Sstevel@tonic-gate 	int ret = DDI_FM_OK;
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	bzero(&ecc_err, sizeof (ecc_errstate_t));
3000Sstevel@tonic-gate 	ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1);
3010Sstevel@tonic-gate 	ecc_err.ecc_ii_p = *ecc_ii_p;
3020Sstevel@tonic-gate 	ecc_err.ecc_p = ecc_p;
3030Sstevel@tonic-gate 	ecc_err.ecc_caller = PCI_ECC_CALL;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	mutex_enter(&cmn_p->pci_fm_mutex);
3060Sstevel@tonic-gate 	ret = ecc_err_handler(&ecc_err);
3070Sstevel@tonic-gate 	mutex_exit(&cmn_p->pci_fm_mutex);
3080Sstevel@tonic-gate 	if (ret == DDI_FM_FATAL) {
3090Sstevel@tonic-gate 		/*
3100Sstevel@tonic-gate 		 * Need delay here to allow CPUs to handle related traps,
3110Sstevel@tonic-gate 		 * such as FRUs for USIIIi systems.
3120Sstevel@tonic-gate 		 */
3130Sstevel@tonic-gate 		DELAY(pci_ecc_panic_delay);
3140Sstevel@tonic-gate 		fm_panic("Fatal PCI UE Error");
3150Sstevel@tonic-gate 	}
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate /*
3210Sstevel@tonic-gate  * Function used to gather IO ECC error state.
3220Sstevel@tonic-gate  */
3230Sstevel@tonic-gate static void
3240Sstevel@tonic-gate ecc_errstate_get(ecc_errstate_t *ecc_err_p)
3250Sstevel@tonic-gate {
3260Sstevel@tonic-gate 	ecc_t *ecc_p;
3270Sstevel@tonic-gate 	uint_t bus_id;
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	ASSERT(ecc_err_p);
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	ecc_p = ecc_err_p->ecc_ii_p.ecc_p;
3320Sstevel@tonic-gate 	bus_id = ecc_p->ecc_pci_cmn_p->pci_common_id;
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
3350Sstevel@tonic-gate 	/*
3360Sstevel@tonic-gate 	 * Read the fault registers.
3370Sstevel@tonic-gate 	 */
3380Sstevel@tonic-gate 	ecc_err_p->ecc_afsr = ecc_read_afsr(&ecc_err_p->ecc_ii_p);
3390Sstevel@tonic-gate 	ecc_err_p->ecc_afar = lddphysio(ecc_err_p->ecc_ii_p.ecc_afar_pa);
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 	ecc_err_p->ecc_offset = ((ecc_err_p->ecc_afsr &
3420Sstevel@tonic-gate 				ecc_err_p->ecc_ii_p.ecc_offset_mask) >>
3430Sstevel@tonic-gate 			ecc_err_p->ecc_ii_p.ecc_offset_shift) <<
3440Sstevel@tonic-gate 		ecc_err_p->ecc_ii_p.ecc_size_log2;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_id = gethrtime();
3470Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_stat = ecc_err_p->ecc_afsr;
3480Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_addr = P2ALIGN(ecc_err_p->ecc_afar, 64) +
3490Sstevel@tonic-gate 		ecc_err_p->ecc_offset;
3500Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_bus_id = bus_id;
3510Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_inst = CPU->cpu_id;
3520Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_status = ECC_IOBUS;
3530Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_in_memory = (pf_is_memory
3540Sstevel@tonic-gate 			(ecc_err_p->ecc_afar >> MMU_PAGESHIFT))? 1: 0;
3550Sstevel@tonic-gate 	ecc_err_p->ecc_aflt.flt_class = BUS_FAULT;
3560Sstevel@tonic-gate }
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate /*
3590Sstevel@tonic-gate  * ecc_pci_check: Called by ecc_err_handler() this function is responsible
3600Sstevel@tonic-gate  * for calling pci_pbm_err_handler() for both sides of the schizo/psycho
3610Sstevel@tonic-gate  * and calling their children error handlers(via ndi_fm_handler_dispatch()).
3620Sstevel@tonic-gate  */
3630Sstevel@tonic-gate static int
3640Sstevel@tonic-gate ecc_pci_check(ecc_t *ecc_p, uint64_t fme_ena)
3650Sstevel@tonic-gate {
3660Sstevel@tonic-gate 	ddi_fm_error_t derr;
3670Sstevel@tonic-gate 	int i;
3680Sstevel@tonic-gate 	int ret;
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 	bzero(&derr, sizeof (ddi_fm_error_t));
3730Sstevel@tonic-gate 	derr.fme_version = DDI_FME_VERSION;
3740Sstevel@tonic-gate 	derr.fme_ena = fme_ena;
3750Sstevel@tonic-gate 	ret = DDI_FM_NONFATAL;
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 	/*
3780Sstevel@tonic-gate 	 * Need to report any PBM errors which may have caused or
3790Sstevel@tonic-gate 	 * resulted from this error.
3800Sstevel@tonic-gate 	 *
3810Sstevel@tonic-gate 	 * Each psycho or schizo is represented by a pair of pci nodes
3820Sstevel@tonic-gate 	 * in the device tree.
3830Sstevel@tonic-gate 	 */
3840Sstevel@tonic-gate 	for (i = 0; i < 2; i++) {
3850Sstevel@tonic-gate 		dev_info_t *dip;
3860Sstevel@tonic-gate 		pci_t *pci_p;
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 		/* Make sure PBM PCI node exists */
3890Sstevel@tonic-gate 		pci_p = ecc_p->ecc_pci_cmn_p->pci_p[i];
3900Sstevel@tonic-gate 		if (pci_p == NULL)
3910Sstevel@tonic-gate 			continue;
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 		dip = pci_p->pci_dip;
3940Sstevel@tonic-gate 		if (pci_pbm_err_handler(dip, &derr, (void *)pci_p,
3950Sstevel@tonic-gate 		    PCI_ECC_CALL) == DDI_FM_FATAL)
3960Sstevel@tonic-gate 			ret = DDI_FM_FATAL;
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 	if (ret == DDI_FM_FATAL)
3990Sstevel@tonic-gate 		return (DDI_FM_FATAL);
4000Sstevel@tonic-gate 	else
4010Sstevel@tonic-gate 		return (DDI_FM_NONFATAL);
4020Sstevel@tonic-gate }
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate /*
4050Sstevel@tonic-gate  * Function used to handle and log IO detected ECC errors, can be called by
4060Sstevel@tonic-gate  * ecc_intr and pci_err_callback(trap callback). Protected by pci_fm_mutex.
4070Sstevel@tonic-gate  */
4080Sstevel@tonic-gate int
4090Sstevel@tonic-gate ecc_err_handler(ecc_errstate_t *ecc_err_p)
4100Sstevel@tonic-gate {
4110Sstevel@tonic-gate 	uint64_t pri_err, sec_err;
4120Sstevel@tonic-gate 	ecc_intr_info_t *ecc_ii_p = &ecc_err_p->ecc_ii_p;
4130Sstevel@tonic-gate 	ecc_t *ecc_p = ecc_ii_p->ecc_p;
4140Sstevel@tonic-gate 	pci_t *pci_p;
4150Sstevel@tonic-gate 	cb_t *cb_p;
4160Sstevel@tonic-gate 	int fatal = 0;
4170Sstevel@tonic-gate 	int nonfatal = 0;
418*4039Skd93003 	ecc_errstate_t ecc_sec_err;
419*4039Skd93003 	uint64_t sec_tmp;
420*4039Skd93003 	int i;
421*4039Skd93003 	uint64_t afsr_err[] = { COMMON_ECC_AFSR_E_PIO,
422*4039Skd93003 				COMMON_ECC_AFSR_E_DRD,
423*4039Skd93003 				COMMON_ECC_AFSR_E_DWR };
424*4039Skd93003 
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	pci_p = ecc_p->ecc_pci_cmn_p->pci_p[0];
4290Sstevel@tonic-gate 	if (pci_p == NULL)
4300Sstevel@tonic-gate 		pci_p = ecc_p->ecc_pci_cmn_p->pci_p[1];
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 	cb_p = ecc_p->ecc_pci_cmn_p->pci_common_cb_p;
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	ecc_errstate_get(ecc_err_p);
4350Sstevel@tonic-gate 	pri_err = (ecc_err_p->ecc_afsr >> COMMON_ECC_UE_AFSR_PE_SHIFT) &
4360Sstevel@tonic-gate 		COMMON_ECC_UE_AFSR_E_MASK;
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 	sec_err = (ecc_err_p->ecc_afsr >> COMMON_ECC_UE_AFSR_SE_SHIFT) &
4390Sstevel@tonic-gate 		COMMON_ECC_UE_AFSR_E_MASK;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	switch (ecc_ii_p->ecc_type) {
4420Sstevel@tonic-gate 	case CBNINTR_UE:
4430Sstevel@tonic-gate 		if (pri_err) {
4440Sstevel@tonic-gate 			ecc_err_p->ecc_aflt.flt_synd =
4450Sstevel@tonic-gate 				pci_ecc_get_synd(ecc_err_p->ecc_afsr);
4460Sstevel@tonic-gate 			ecc_err_p->ecc_pri = 1;
4470Sstevel@tonic-gate 			pci_ecc_classify(pri_err, ecc_err_p);
4480Sstevel@tonic-gate 			errorq_dispatch(pci_ecc_queue, (void *)ecc_err_p,
4490Sstevel@tonic-gate 				sizeof (ecc_errstate_t),
4500Sstevel@tonic-gate 				ecc_err_p->ecc_aflt.flt_panic);
4510Sstevel@tonic-gate 		}
4520Sstevel@tonic-gate 		if (sec_err) {
4530Sstevel@tonic-gate 			ecc_sec_err = *ecc_err_p;
4540Sstevel@tonic-gate 			ecc_sec_err.ecc_pri = 0;
455815Sdilpreet 			/*
456*4039Skd93003 			 * Secondary errors are cumulative so we need to loop
457815Sdilpreet 			 * through to capture them all.
458815Sdilpreet 			 */
459815Sdilpreet 			for (i = 0; i < 3; i++) {
460815Sdilpreet 				sec_tmp = sec_err & afsr_err[i];
461815Sdilpreet 				if (sec_tmp) {
462815Sdilpreet 					pci_ecc_classify(sec_tmp, &ecc_sec_err);
463815Sdilpreet 					ecc_ereport_post(pci_p->pci_dip,
464815Sdilpreet 					    &ecc_sec_err);
465815Sdilpreet 				}
466815Sdilpreet 			}
4670Sstevel@tonic-gate 		}
4680Sstevel@tonic-gate 		/*
4690Sstevel@tonic-gate 		 * Check for PCI bus errors that may have resulted from or
4700Sstevel@tonic-gate 		 * caused this UE.
4710Sstevel@tonic-gate 		 */
4720Sstevel@tonic-gate 		if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
4730Sstevel@tonic-gate 		    ecc_pci_check(ecc_p, ecc_err_p->ecc_ena) == DDI_FM_FATAL)
4740Sstevel@tonic-gate 			ecc_err_p->ecc_aflt.flt_panic = 1;
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate 		if (ecc_err_p->ecc_aflt.flt_panic &&
4770Sstevel@tonic-gate 				ecc_err_p->ecc_aflt.flt_in_memory)
4780Sstevel@tonic-gate 			panic_aflt = ecc_err_p->ecc_aflt;
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 		if (ecc_err_p->ecc_aflt.flt_panic) {
4810Sstevel@tonic-gate 			/*
4820Sstevel@tonic-gate 			 * Disable all further errors since this will be
4830Sstevel@tonic-gate 			 * treated as a fatal error.
4840Sstevel@tonic-gate 			 */
4850Sstevel@tonic-gate 			(void) ecc_disable_nowait(ecc_p);
4860Sstevel@tonic-gate 			fatal++;
4870Sstevel@tonic-gate 		}
4880Sstevel@tonic-gate 		break;
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	case CBNINTR_CE:
4910Sstevel@tonic-gate 		if (pri_err) {
4920Sstevel@tonic-gate 			ecc_err_p->ecc_pri = 1;
4930Sstevel@tonic-gate 			pci_ecc_classify(pri_err, ecc_err_p);
4940Sstevel@tonic-gate 			ecc_err_p->ecc_aflt.flt_synd =
4950Sstevel@tonic-gate 				pci_ecc_get_synd(ecc_err_p->ecc_afsr);
4960Sstevel@tonic-gate 			ce_scrub(&ecc_err_p->ecc_aflt);
4970Sstevel@tonic-gate 			errorq_dispatch(pci_ecc_queue, (void *)ecc_err_p,
4980Sstevel@tonic-gate 					sizeof (ecc_errstate_t), ERRORQ_ASYNC);
4990Sstevel@tonic-gate 			nonfatal++;
5000Sstevel@tonic-gate 		}
5010Sstevel@tonic-gate 		if (sec_err) {
5020Sstevel@tonic-gate 			ecc_sec_err = *ecc_err_p;
5030Sstevel@tonic-gate 			ecc_sec_err.ecc_pri = 0;
504*4039Skd93003 			/*
505*4039Skd93003 			 * Secondary errors are cumulative so we need to loop
506*4039Skd93003 			 * through to capture them all.
507*4039Skd93003 			 */
508*4039Skd93003 			for (i = 0; i < 3; i++) {
509*4039Skd93003 				sec_tmp = sec_err & afsr_err[i];
510*4039Skd93003 				if (sec_tmp) {
511*4039Skd93003 					pci_ecc_classify(sec_tmp, &ecc_sec_err);
512*4039Skd93003 					ecc_ereport_post(pci_p->pci_dip,
513*4039Skd93003 					    &ecc_sec_err);
514*4039Skd93003 				}
515*4039Skd93003 			}
5160Sstevel@tonic-gate 			nonfatal++;
5170Sstevel@tonic-gate 		}
5180Sstevel@tonic-gate 		break;
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate 	default:
5210Sstevel@tonic-gate 		return (DDI_FM_OK);
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 	/* Clear the errors */
5240Sstevel@tonic-gate 	stdphysio(ecc_ii_p->ecc_afsr_pa, ecc_err_p->ecc_afsr);
5250Sstevel@tonic-gate 	/*
5260Sstevel@tonic-gate 	 * Clear the interrupt if called by ecc_intr and UE error or if called
5270Sstevel@tonic-gate 	 * by ecc_intr and CE error and delayed CE interrupt handling is
5280Sstevel@tonic-gate 	 * turned off.
5290Sstevel@tonic-gate 	 */
5300Sstevel@tonic-gate 	if ((ecc_err_p->ecc_caller == PCI_ECC_CALL &&
5310Sstevel@tonic-gate 	    ecc_ii_p->ecc_type == CBNINTR_UE && !fatal) ||
5320Sstevel@tonic-gate 	    (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
5330Sstevel@tonic-gate 	    ecc_ii_p->ecc_type == CBNINTR_CE && !ecc_ce_delayed))
5340Sstevel@tonic-gate 		cb_clear_nintr(cb_p, ecc_ii_p->ecc_type);
5350Sstevel@tonic-gate 	if (!fatal && !nonfatal)
5360Sstevel@tonic-gate 		return (DDI_FM_OK);
5370Sstevel@tonic-gate 	else if (fatal)
5380Sstevel@tonic-gate 		return (DDI_FM_FATAL);
5390Sstevel@tonic-gate 	return (DDI_FM_NONFATAL);
5400Sstevel@tonic-gate }
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate /*
5430Sstevel@tonic-gate  * Called from ecc_err_drain below for CBINTR_CE case.
5440Sstevel@tonic-gate  */
5450Sstevel@tonic-gate static int
546917Selowe ecc_err_cexdiag(ecc_errstate_t *ecc_err, errorq_elem_t *eqep)
5470Sstevel@tonic-gate {
5480Sstevel@tonic-gate 	struct async_flt *ecc = &ecc_err->ecc_aflt;
549917Selowe 	uint64_t errors;
5500Sstevel@tonic-gate 
551917Selowe 	if (page_retire_check(ecc->flt_addr, &errors) == EINVAL) {
5520Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(ecc->flt_disp, CE_XDIAG_SKIP_NOPP);
5530Sstevel@tonic-gate 		return (0);
554917Selowe 	} else if (errors != PR_OK) {
5550Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(ecc->flt_disp, CE_XDIAG_SKIP_PAGEDET);
5560Sstevel@tonic-gate 		return (0);
557917Selowe 	} else {
558917Selowe 		return (ce_scrub_xdiag_recirc(ecc, pci_ecc_queue, eqep,
559917Selowe 		    offsetof(ecc_errstate_t, ecc_aflt)));
5600Sstevel@tonic-gate 	}
5610Sstevel@tonic-gate }
5620Sstevel@tonic-gate 
5630Sstevel@tonic-gate /*
5640Sstevel@tonic-gate  * Function used to drain pci_ecc_queue, either during panic or after softint
5650Sstevel@tonic-gate  * is generated, to log IO detected ECC errors.
5660Sstevel@tonic-gate  */
5670Sstevel@tonic-gate /*ARGSUSED*/
5680Sstevel@tonic-gate void
5690Sstevel@tonic-gate ecc_err_drain(void *not_used, ecc_errstate_t *ecc_err, errorq_elem_t *eqep)
5700Sstevel@tonic-gate {
5710Sstevel@tonic-gate 	struct async_flt *ecc = &ecc_err->ecc_aflt;
5720Sstevel@tonic-gate 	pci_t *pci_p = ecc_err->ecc_p->ecc_pci_cmn_p->pci_p[0];
5730Sstevel@tonic-gate 	int ecc_type = ecc_err->ecc_ii_p.ecc_type;
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	if (pci_p == NULL)
5760Sstevel@tonic-gate 		pci_p = ecc_err->ecc_p->ecc_pci_cmn_p->pci_p[1];
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 	if (ecc->flt_class == RECIRC_BUS_FAULT) {
5790Sstevel@tonic-gate 		/*
5800Sstevel@tonic-gate 		 * Perform any additional actions that occur after the
5810Sstevel@tonic-gate 		 * ecc_err_cexdiag below and post the ereport.
5820Sstevel@tonic-gate 		 */
5830Sstevel@tonic-gate 		ecc->flt_class = BUS_FAULT;
5840Sstevel@tonic-gate 		ecc_err->ecc_err_type = flt_to_error_type(ecc);
5850Sstevel@tonic-gate 		ecc_ereport_post(pci_p->pci_dip, ecc_err);
5860Sstevel@tonic-gate 		return;
5870Sstevel@tonic-gate 	}
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 	ecc_cpu_call(ecc, ecc_err->ecc_unum, (ecc_type == CBNINTR_UE) ?
5900Sstevel@tonic-gate 			ECC_IO_UE : ECC_IO_CE);
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate 	switch (ecc_type) {
5930Sstevel@tonic-gate 	case CBNINTR_UE:
594917Selowe 		if (ecc_err->ecc_pg_ret == 1) {
595917Selowe 			(void) page_retire(ecc->flt_addr, PR_UE);
5960Sstevel@tonic-gate 		}
59749Sgavinm 		ecc_err->ecc_err_type = flt_to_error_type(ecc);
5980Sstevel@tonic-gate 		break;
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 	case CBNINTR_CE:
6010Sstevel@tonic-gate 		/*
6020Sstevel@tonic-gate 		 * Setup timeout (if CE detected via interrupt) to
6030Sstevel@tonic-gate 		 * re-enable CE interrupts if no more CEs are detected.
6040Sstevel@tonic-gate 		 * This is to protect against CE storms.
6050Sstevel@tonic-gate 		 */
6060Sstevel@tonic-gate 		if (ecc_ce_delayed &&
6070Sstevel@tonic-gate 		    ecc_err->ecc_caller == PCI_ECC_CALL &&
6080Sstevel@tonic-gate 		    ecc_err->ecc_p->ecc_to_id == 0) {
6090Sstevel@tonic-gate 			ecc_err->ecc_p->ecc_to_id = timeout(ecc_delayed_ce,
6100Sstevel@tonic-gate 			    (void *)ecc_err->ecc_p,
6110Sstevel@tonic-gate 			    drv_usectohz((clock_t)ecc_ce_delay_secs *
6120Sstevel@tonic-gate 			    MICROSEC));
6130Sstevel@tonic-gate 		}
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 		/* ecc_err_cexdiag returns nonzero to recirculate */
6160Sstevel@tonic-gate 		if (CE_XDIAG_EXT_ALG_APPLIED(ecc->flt_disp) &&
617917Selowe 		    ecc_err_cexdiag(ecc_err, eqep))
6180Sstevel@tonic-gate 			return;
6190Sstevel@tonic-gate 		ecc_err->ecc_err_type = flt_to_error_type(ecc);
6200Sstevel@tonic-gate 		break;
6210Sstevel@tonic-gate 	}
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate 	ecc_ereport_post(pci_p->pci_dip, ecc_err);
6240Sstevel@tonic-gate }
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate static void
6270Sstevel@tonic-gate ecc_delayed_ce(void *arg)
6280Sstevel@tonic-gate {
6290Sstevel@tonic-gate 	ecc_t *ecc_p = (ecc_t *)arg;
6300Sstevel@tonic-gate 	pci_common_t *cmn_p;
6310Sstevel@tonic-gate 	cb_t *cb_p;
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 	ASSERT(ecc_p);
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	cmn_p = ecc_p->ecc_pci_cmn_p;
6360Sstevel@tonic-gate 	cb_p = cmn_p->pci_common_cb_p;
6370Sstevel@tonic-gate 	/*
6380Sstevel@tonic-gate 	 * If no more CE errors are found then enable interrupts(by
6390Sstevel@tonic-gate 	 * clearing the previous interrupt), else send in for logging
6400Sstevel@tonic-gate 	 * and the timeout should be set again.
6410Sstevel@tonic-gate 	 */
6420Sstevel@tonic-gate 	ecc_p->ecc_to_id = 0;
6430Sstevel@tonic-gate 	if (!((ecc_read_afsr(&ecc_p->ecc_ce) >>
6440Sstevel@tonic-gate 	    COMMON_ECC_UE_AFSR_PE_SHIFT) & COMMON_ECC_UE_AFSR_E_MASK)) {
6450Sstevel@tonic-gate 		cb_clear_nintr(cb_p, ecc_p->ecc_ce.ecc_type);
6460Sstevel@tonic-gate 	} else {
6470Sstevel@tonic-gate 		ecc_errstate_t ecc_err;
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 		bzero(&ecc_err, sizeof (ecc_errstate_t));
6500Sstevel@tonic-gate 		ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1);
6510Sstevel@tonic-gate 		ecc_err.ecc_ii_p = ecc_p->ecc_ce;
6520Sstevel@tonic-gate 		ecc_err.ecc_p = ecc_p;
6530Sstevel@tonic-gate 		ecc_err.ecc_caller = PCI_ECC_CALL;
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 		mutex_enter(&cmn_p->pci_fm_mutex);
6560Sstevel@tonic-gate 		(void) ecc_err_handler(&ecc_err);
6570Sstevel@tonic-gate 		mutex_exit(&cmn_p->pci_fm_mutex);
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate  * Function used to post IO detected ECC ereports.
6630Sstevel@tonic-gate  */
6640Sstevel@tonic-gate static void
6650Sstevel@tonic-gate ecc_ereport_post(dev_info_t *dip, ecc_errstate_t *ecc_err)
6660Sstevel@tonic-gate {
6670Sstevel@tonic-gate 	char buf[FM_MAX_CLASS], dev_path[MAXPATHLEN], *ptr;
6680Sstevel@tonic-gate 	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
6690Sstevel@tonic-gate 	nvlist_t *ereport, *detector;
6700Sstevel@tonic-gate 	nv_alloc_t *nva;
6710Sstevel@tonic-gate 	errorq_elem_t *eqep;
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 	/*
6740Sstevel@tonic-gate 	 * We do not use ddi_fm_ereport_post because we need to set a
6750Sstevel@tonic-gate 	 * special detector here. Since we do not have a device path for
6760Sstevel@tonic-gate 	 * the bridge chip we use what we think it should be to aid in
6770Sstevel@tonic-gate 	 * diagnosis. This path fmri is created by pci_fmri_create()
6780Sstevel@tonic-gate 	 * during initialization.
6790Sstevel@tonic-gate 	 */
6800Sstevel@tonic-gate 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", DDI_IO_CLASS,
6810Sstevel@tonic-gate 	    ecc_err->ecc_bridge_type, ecc_err->ecc_aflt.flt_erpt_class);
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate 	ecc_err->ecc_ena = ecc_err->ecc_ena ? ecc_err->ecc_ena :
6840Sstevel@tonic-gate 		fm_ena_generate(0, FM_ENA_FMT1);
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	eqep = errorq_reserve(fmhdl->fh_errorq);
6870Sstevel@tonic-gate 	if (eqep == NULL)
6880Sstevel@tonic-gate 		return;
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 	ereport = errorq_elem_nvl(fmhdl->fh_errorq, eqep);
6910Sstevel@tonic-gate 	nva = errorq_elem_nva(fmhdl->fh_errorq, eqep);
6920Sstevel@tonic-gate 	detector = fm_nvlist_create(nva);
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	ASSERT(ereport);
6950Sstevel@tonic-gate 	ASSERT(nva);
6960Sstevel@tonic-gate 	ASSERT(detector);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	ddi_pathname(dip, dev_path);
6990Sstevel@tonic-gate 	ptr = strrchr(dev_path, (int)',');
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	if (ptr)
7020Sstevel@tonic-gate 		*ptr = '\0';
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 	fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, dev_path, NULL);
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	if (ecc_err->ecc_pri) {
7071186Sayznaga 		if ((ecc_err->ecc_fmri = fm_nvlist_create(nva)) != NULL) {
7081186Sayznaga 			char sid[DIMM_SERIAL_ID_LEN] = "";
7091186Sayznaga 			uint64_t offset = (uint64_t)-1;
7101186Sayznaga 			int len;
7111186Sayznaga 			int ret;
7121186Sayznaga 
7131186Sayznaga 			ret = cpu_get_mem_sid(ecc_err->ecc_unum, sid,
7141186Sayznaga 			    DIMM_SERIAL_ID_LEN, &len);
7151186Sayznaga 
7161186Sayznaga 			if (ret == 0) {
7171186Sayznaga 				(void) cpu_get_mem_offset(
7181186Sayznaga 				    ecc_err->ecc_aflt.flt_addr, &offset);
7191186Sayznaga 			}
7201186Sayznaga 
7210Sstevel@tonic-gate 			fm_fmri_mem_set(ecc_err->ecc_fmri,
7220Sstevel@tonic-gate 			    FM_MEM_SCHEME_VERSION, NULL, ecc_err->ecc_unum,
7231186Sayznaga 			    (ret == 0) ? sid : NULL, offset);
7241186Sayznaga 		}
7250Sstevel@tonic-gate 		fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
7260Sstevel@tonic-gate 		    ecc_err->ecc_ena, detector,
7270Sstevel@tonic-gate 		    PCI_ECC_AFSR, DATA_TYPE_UINT64, ecc_err->ecc_afsr,
7280Sstevel@tonic-gate 		    PCI_ECC_AFAR, DATA_TYPE_UINT64, ecc_err->ecc_aflt.flt_addr,
7290Sstevel@tonic-gate 		    PCI_ECC_CTRL, DATA_TYPE_UINT64, ecc_err->ecc_ctrl,
7300Sstevel@tonic-gate 		    PCI_ECC_SYND, DATA_TYPE_UINT16, ecc_err->ecc_aflt.flt_synd,
7310Sstevel@tonic-gate 		    PCI_ECC_TYPE, DATA_TYPE_STRING, ecc_err->ecc_err_type,
7320Sstevel@tonic-gate 		    PCI_ECC_DISP, DATA_TYPE_UINT64, ecc_err->ecc_aflt.flt_disp,
7330Sstevel@tonic-gate 		    PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, ecc_err->ecc_fmri,
7340Sstevel@tonic-gate 		    NULL);
7350Sstevel@tonic-gate 	} else {
7360Sstevel@tonic-gate 		fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
7370Sstevel@tonic-gate 		    ecc_err->ecc_ena, detector,
7380Sstevel@tonic-gate 		    PCI_ECC_AFSR, DATA_TYPE_UINT64, ecc_err->ecc_afsr,
7390Sstevel@tonic-gate 		    PCI_ECC_CTRL, DATA_TYPE_UINT64, ecc_err->ecc_ctrl,
7400Sstevel@tonic-gate 		    NULL);
7410Sstevel@tonic-gate 	}
7420Sstevel@tonic-gate 	errorq_commit(fmhdl->fh_errorq, eqep, ERRORQ_ASYNC);
7430Sstevel@tonic-gate }
744