11772Sjl139090 /*
21772Sjl139090 * CDDL HEADER START
31772Sjl139090 *
41772Sjl139090 * The contents of this file are subject to the terms of the
51772Sjl139090 * Common Development and Distribution License (the "License").
61772Sjl139090 * You may not use this file except in compliance with the License.
71772Sjl139090 *
81772Sjl139090 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91772Sjl139090 * or http://www.opensolaris.org/os/licensing.
101772Sjl139090 * See the License for the specific language governing permissions
111772Sjl139090 * and limitations under the License.
121772Sjl139090 *
131772Sjl139090 * When distributing Covered Code, include this CDDL HEADER in each
141772Sjl139090 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151772Sjl139090 * If applicable, add the following below this CDDL HEADER, with the
161772Sjl139090 * fields enclosed by brackets "[]" replaced with your own identifying
171772Sjl139090 * information: Portions Copyright [yyyy] [name of copyright owner]
181772Sjl139090 *
191772Sjl139090 * CDDL HEADER END
201772Sjl139090 */
211772Sjl139090 /*
221772Sjl139090 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
231772Sjl139090 * Use is subject to license terms.
241772Sjl139090 */
251772Sjl139090
261772Sjl139090 #pragma ident "%Z%%M% %I% %E% SMI"
271772Sjl139090
281772Sjl139090 /*
291772Sjl139090 * CMU-CH ECC support
301772Sjl139090 */
311772Sjl139090
321772Sjl139090 #include <sys/types.h>
331772Sjl139090 #include <sys/systm.h>
341772Sjl139090 #include <sys/kmem.h>
351772Sjl139090 #include <sys/sunddi.h>
361772Sjl139090 #include <sys/intr.h>
371772Sjl139090 #include <sys/async.h>
381772Sjl139090 #include <sys/ddi_impldefs.h>
391772Sjl139090 #include <sys/machsystm.h>
401772Sjl139090 #include <sys/sysmacros.h>
411772Sjl139090 #include <sys/fm/protocol.h>
421772Sjl139090 #include <sys/fm/util.h>
431772Sjl139090 #include <sys/fm/io/pci.h>
441772Sjl139090 #include <sys/fm/io/sun4upci.h>
451772Sjl139090 #include <sys/fm/io/ddi.h>
461772Sjl139090 #include <sys/pcicmu/pcicmu.h>
471772Sjl139090
481772Sjl139090 static void pcmu_ecc_disable(pcmu_ecc_t *, int);
491772Sjl139090 static uint64_t pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t *);
501772Sjl139090 static void pcmu_ecc_ereport_post(dev_info_t *dip,
511772Sjl139090 pcmu_ecc_errstate_t *ecc_err);
521772Sjl139090
531772Sjl139090 clock_t pcmu_pecc_panic_delay = 200;
541772Sjl139090
551772Sjl139090 void
pcmu_ecc_create(pcmu_t * pcmu_p)561772Sjl139090 pcmu_ecc_create(pcmu_t *pcmu_p)
571772Sjl139090 {
581772Sjl139090 uint64_t pcb_base_pa = pcmu_p->pcmu_cb_p->pcb_base_pa;
591772Sjl139090 pcmu_ecc_t *pecc_p;
601772Sjl139090 /* LINTED variable */
611772Sjl139090 dev_info_t *dip = pcmu_p->pcmu_dip;
621772Sjl139090
631772Sjl139090 pecc_p = (pcmu_ecc_t *)kmem_zalloc(sizeof (pcmu_ecc_t), KM_SLEEP);
641772Sjl139090 pecc_p->pecc_pcmu_p = pcmu_p;
651772Sjl139090 pcmu_p->pcmu_pecc_p = pecc_p;
661772Sjl139090
671772Sjl139090 pecc_p->pecc_ue.pecc_p = pecc_p;
681772Sjl139090 pecc_p->pecc_ue.pecc_type = CBNINTR_UE;
691772Sjl139090
701772Sjl139090 pcmu_ecc_setup(pecc_p);
711772Sjl139090
721772Sjl139090 /*
731772Sjl139090 * Determine the virtual addresses of the streaming cache
741772Sjl139090 * control/status and flush registers.
751772Sjl139090 */
761772Sjl139090 pecc_p->pecc_csr_pa = pcb_base_pa + PCMU_ECC_CSR_OFFSET;
771772Sjl139090 pecc_p->pecc_ue.pecc_afsr_pa = pcb_base_pa + PCMU_UE_AFSR_OFFSET;
781772Sjl139090 pecc_p->pecc_ue.pecc_afar_pa = pcb_base_pa + PCMU_UE_AFAR_OFFSET;
791772Sjl139090
801772Sjl139090 PCMU_DBG1(PCMU_DBG_ATTACH, dip, "pcmu_ecc_create: csr=%x\n",
811772Sjl139090 pecc_p->pecc_csr_pa);
821772Sjl139090 PCMU_DBG2(PCMU_DBG_ATTACH, dip,
831772Sjl139090 "pcmu_ecc_create: ue_afsr=%x, ue_afar=%x\n",
841772Sjl139090 pecc_p->pecc_ue.pecc_afsr_pa, pecc_p->pecc_ue.pecc_afar_pa);
851772Sjl139090
861772Sjl139090 pcmu_ecc_configure(pcmu_p);
871772Sjl139090
881772Sjl139090 /*
891772Sjl139090 * Register routines to be called from system error handling code.
901772Sjl139090 */
911772Sjl139090 bus_func_register(BF_TYPE_ERRDIS,
921772Sjl139090 (busfunc_t)pcmu_ecc_disable_nowait, pecc_p);
931772Sjl139090 }
941772Sjl139090
951772Sjl139090 int
pcmu_ecc_register_intr(pcmu_t * pcmu_p)961772Sjl139090 pcmu_ecc_register_intr(pcmu_t *pcmu_p)
971772Sjl139090 {
981772Sjl139090 pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
991772Sjl139090 int ret;
1001772Sjl139090
1011772Sjl139090 /*
1021772Sjl139090 * Install the UE error interrupt handlers.
1031772Sjl139090 */
1041772Sjl139090 ret = pcmu_ecc_add_intr(pcmu_p, CBNINTR_UE, &pecc_p->pecc_ue);
1051772Sjl139090 return (ret);
1061772Sjl139090 }
1071772Sjl139090
1081772Sjl139090 void
pcmu_ecc_destroy(pcmu_t * pcmu_p)1091772Sjl139090 pcmu_ecc_destroy(pcmu_t *pcmu_p)
1101772Sjl139090 {
1111772Sjl139090 pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
1121772Sjl139090
1131772Sjl139090 PCMU_DBG0(PCMU_DBG_DETACH, pcmu_p->pcmu_dip, "pcmu_ecc_destroy:\n");
1141772Sjl139090
1151772Sjl139090 /*
1161772Sjl139090 * Disable UE ECC error interrupts.
1171772Sjl139090 */
1181772Sjl139090 pcmu_ecc_disable_wait(pecc_p);
1191772Sjl139090
1201772Sjl139090 /*
1211772Sjl139090 * Remove the ECC interrupt handlers.
1221772Sjl139090 */
1231772Sjl139090 pcmu_ecc_rem_intr(pcmu_p, CBNINTR_UE, &pecc_p->pecc_ue);
1241772Sjl139090
1251772Sjl139090 /*
1261772Sjl139090 * Unregister our error handling functions.
1271772Sjl139090 */
1281772Sjl139090 bus_func_unregister(BF_TYPE_ERRDIS,
1291772Sjl139090 (busfunc_t)pcmu_ecc_disable_nowait, pecc_p);
1301772Sjl139090 /*
1311772Sjl139090 * If a timer has been set, unset it.
1321772Sjl139090 */
1331772Sjl139090 (void) untimeout(pecc_p->pecc_tout_id);
1341772Sjl139090 kmem_free(pecc_p, sizeof (pcmu_ecc_t));
1351772Sjl139090 pcmu_p->pcmu_pecc_p = NULL;
1361772Sjl139090 }
1371772Sjl139090
1381772Sjl139090 void
pcmu_ecc_configure(pcmu_t * pcmu_p)1391772Sjl139090 pcmu_ecc_configure(pcmu_t *pcmu_p)
1401772Sjl139090 {
1411772Sjl139090 pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
1421772Sjl139090 uint64_t l;
1431772Sjl139090 /* LINTED variable */
1441772Sjl139090 dev_info_t *dip = pcmu_p->pcmu_dip;
1451772Sjl139090
1461772Sjl139090 /*
1471772Sjl139090 * Clear any pending ECC errors.
1481772Sjl139090 */
1491772Sjl139090 PCMU_DBG0(PCMU_DBG_ATTACH, dip,
1501772Sjl139090 "pcmu_ecc_configure: clearing UE errors\n");
1511772Sjl139090 l = (PCMU_ECC_UE_AFSR_E_MASK << PCMU_ECC_UE_AFSR_PE_SHIFT) |
1521772Sjl139090 (PCMU_ECC_UE_AFSR_E_MASK << PCMU_ECC_UE_AFSR_SE_SHIFT);
1531772Sjl139090 stdphysio(pecc_p->pecc_ue.pecc_afsr_pa, l);
1541772Sjl139090
1551772Sjl139090 /*
1561772Sjl139090 * Enable ECC error detections via the control register.
1571772Sjl139090 */
1581772Sjl139090 PCMU_DBG0(PCMU_DBG_ATTACH, dip,
1591772Sjl139090 "pcmu_ecc_configure: enabling UE detection\n");
1601772Sjl139090 l = PCMU_ECC_CTRL_ECC_EN;
1611772Sjl139090 if (ecc_error_intr_enable)
1621772Sjl139090 l |= PCMU_ECC_CTRL_UE_INTEN;
1631772Sjl139090 stdphysio(pecc_p->pecc_csr_pa, l);
1641772Sjl139090 }
1651772Sjl139090
1661772Sjl139090 void
pcmu_ecc_enable_intr(pcmu_t * pcmu_p)1671772Sjl139090 pcmu_ecc_enable_intr(pcmu_t *pcmu_p)
1681772Sjl139090 {
1691772Sjl139090 pcmu_cb_enable_nintr(pcmu_p, CBNINTR_UE);
1701772Sjl139090 }
1711772Sjl139090
1721772Sjl139090 void
pcmu_ecc_disable_wait(pcmu_ecc_t * pecc_p)1731772Sjl139090 pcmu_ecc_disable_wait(pcmu_ecc_t *pecc_p)
1741772Sjl139090 {
1751772Sjl139090 pcmu_ecc_disable(pecc_p, PCMU_IB_INTR_WAIT);
1761772Sjl139090 }
1771772Sjl139090
1781772Sjl139090 uint_t
pcmu_ecc_disable_nowait(pcmu_ecc_t * pecc_p)1791772Sjl139090 pcmu_ecc_disable_nowait(pcmu_ecc_t *pecc_p)
1801772Sjl139090 {
1811772Sjl139090 pcmu_ecc_disable(pecc_p, PCMU_IB_INTR_NOWAIT);
1821772Sjl139090 return (BF_NONE);
1831772Sjl139090 }
1841772Sjl139090
1851772Sjl139090 static void
pcmu_ecc_disable(pcmu_ecc_t * pecc_p,int wait)1861772Sjl139090 pcmu_ecc_disable(pcmu_ecc_t *pecc_p, int wait)
1871772Sjl139090 {
1881772Sjl139090 pcmu_cb_t *pcb_p = pecc_p->pecc_pcmu_p->pcmu_cb_p;
1891772Sjl139090 uint64_t csr_pa = pecc_p->pecc_csr_pa;
1901772Sjl139090 uint64_t csr = lddphysio(csr_pa);
1911772Sjl139090
1921772Sjl139090 csr &= ~(PCMU_ECC_CTRL_UE_INTEN);
1931772Sjl139090 stdphysio(csr_pa, csr);
1941772Sjl139090 pcmu_cb_disable_nintr(pcb_p, CBNINTR_UE, wait);
1951772Sjl139090 }
1961772Sjl139090
1971772Sjl139090 /*
1981772Sjl139090 * I/O ECC error handling:
1991772Sjl139090 *
2001772Sjl139090 * Below are the generic functions that handle detected ECC errors.
2011772Sjl139090 *
2021772Sjl139090 * The registered interrupt handler is pcmu_ecc_intr(), it's function
2031772Sjl139090 * is to receive the error, capture some state, and pass that on to
2041772Sjl139090 * the pcmu_ecc_err_handler() for reporting purposes.
2051772Sjl139090 *
2061772Sjl139090 * pcmu_ecc_err_handler() gathers more state(via pcmu_ecc_errstate_get)
2071772Sjl139090 * and attempts to handle and report the error. pcmu_ecc_err_handler()
2081772Sjl139090 * must determine if we need to panic due to this error (via
2091772Sjl139090 * pcmu_ecc_classify, which also decodes the * ECC afsr), and if any
2101772Sjl139090 * side effects exist that may have caused or are due * to this error.
2111772Sjl139090 * PBM errors related to the ECC error may exist, to report
2121772Sjl139090 * them we call pcmu_pbm_err_handler().
2131772Sjl139090 *
2141772Sjl139090 * To report the error we must also get the syndrome and unum, which can not
2151772Sjl139090 * be done in high level interrupted context. Therefore we have an error
2161772Sjl139090 * queue(pcmu_ecc_queue) which we dispatch errors to, to report the errors
2171772Sjl139090 * (pcmu_ecc_err_drain()).
2181772Sjl139090 *
2191772Sjl139090 * pcmu_ecc_err_drain() will be called when either the softint is triggered
2201772Sjl139090 * or the system is panicing. Either way it will gather more information
2211772Sjl139090 * about the error from the CPU(via ecc_cpu_call(), ecc.c), attempt to
2221772Sjl139090 * retire the faulty page(if error is a UE), and report the detected error.
2231772Sjl139090 *
2241772Sjl139090 */
2251772Sjl139090
2261772Sjl139090 /*
2271772Sjl139090 * Function used to get ECC AFSR register
2281772Sjl139090 */
2291772Sjl139090 static uint64_t
pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t * ecc_ii_p)2301772Sjl139090 pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t *ecc_ii_p)
2311772Sjl139090 {
2321772Sjl139090 ASSERT(ecc_ii_p->pecc_type == CBNINTR_UE);
2331772Sjl139090 return (lddphysio(ecc_ii_p->pecc_afsr_pa));
2341772Sjl139090 }
2351772Sjl139090
2361772Sjl139090 /*
2371772Sjl139090 * IO detected ECC error interrupt handler, calls pcmu_ecc_err_handler to post
2381772Sjl139090 * error reports and handle the interrupt. Re-entry into pcmu_ecc_err_handler
2391772Sjl139090 * is protected by the per-chip mutex pcmu_err_mutex.
2401772Sjl139090 */
2411772Sjl139090 uint_t
pcmu_ecc_intr(caddr_t a)2421772Sjl139090 pcmu_ecc_intr(caddr_t a)
2431772Sjl139090 {
2441772Sjl139090 pcmu_ecc_intr_info_t *ecc_ii_p = (pcmu_ecc_intr_info_t *)a;
2451772Sjl139090 pcmu_ecc_t *pecc_p = ecc_ii_p->pecc_p;
2461772Sjl139090 pcmu_t *pcmu_p = pecc_p->pecc_pcmu_p;
2471772Sjl139090 pcmu_ecc_errstate_t ecc_err;
2481772Sjl139090 int ret = DDI_FM_OK;
2491772Sjl139090
2501772Sjl139090 bzero(&ecc_err, sizeof (pcmu_ecc_errstate_t));
2511772Sjl139090 ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1); /* RAGS */
2521772Sjl139090 ecc_err.ecc_ii_p = *ecc_ii_p;
2531772Sjl139090 ecc_err.pecc_p = pecc_p;
2541772Sjl139090 ecc_err.ecc_caller = PCI_ECC_CALL;
2551772Sjl139090
2561772Sjl139090 mutex_enter(&pcmu_p->pcmu_err_mutex);
2571772Sjl139090 ret = pcmu_ecc_err_handler(&ecc_err);
2581772Sjl139090 mutex_exit(&pcmu_p->pcmu_err_mutex);
2591772Sjl139090 if (ret == DDI_FM_FATAL) {
2601772Sjl139090 /*
2611772Sjl139090 * Need delay here to allow CPUs to handle related traps,
2621772Sjl139090 * such as FRUs for USIIIi systems.
2631772Sjl139090 */
2641772Sjl139090 DELAY(pcmu_pecc_panic_delay);
2651772Sjl139090 cmn_err(CE_PANIC, "Fatal PCI UE Error");
2661772Sjl139090 }
2671772Sjl139090
2681772Sjl139090 return (DDI_INTR_CLAIMED);
2691772Sjl139090 }
2701772Sjl139090
2711772Sjl139090 /*
2721772Sjl139090 * Function used to gather IO ECC error state.
2731772Sjl139090 */
2741772Sjl139090 static void
pcmu_ecc_errstate_get(pcmu_ecc_errstate_t * ecc_err_p)2751772Sjl139090 pcmu_ecc_errstate_get(pcmu_ecc_errstate_t *ecc_err_p)
2761772Sjl139090 {
2771772Sjl139090 pcmu_ecc_t *pecc_p;
2781772Sjl139090 uint_t bus_id;
2791772Sjl139090
2801772Sjl139090 ASSERT(ecc_err_p);
2811772Sjl139090
2821772Sjl139090 pecc_p = ecc_err_p->ecc_ii_p.pecc_p;
2831772Sjl139090 bus_id = pecc_p->pecc_pcmu_p->pcmu_id;
2841772Sjl139090
2851772Sjl139090 ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
2861772Sjl139090 /*
2871772Sjl139090 * Read the fault registers.
2881772Sjl139090 */
2891772Sjl139090 ecc_err_p->ecc_afsr = pcmu_ecc_read_afsr(&ecc_err_p->ecc_ii_p);
2901772Sjl139090 ecc_err_p->ecc_afar = lddphysio(ecc_err_p->ecc_ii_p.pecc_afar_pa);
2911772Sjl139090
2921772Sjl139090 ecc_err_p->ecc_offset = ((ecc_err_p->ecc_afsr &
2931772Sjl139090 ecc_err_p->ecc_ii_p.pecc_offset_mask) >>
2941772Sjl139090 ecc_err_p->ecc_ii_p.pecc_offset_shift) <<
2951772Sjl139090 ecc_err_p->ecc_ii_p.pecc_size_log2;
2961772Sjl139090
2971772Sjl139090 ecc_err_p->ecc_aflt.flt_id = gethrtime();
2981772Sjl139090 ecc_err_p->ecc_aflt.flt_stat = ecc_err_p->ecc_afsr;
2991772Sjl139090 ecc_err_p->ecc_aflt.flt_addr = P2ALIGN(ecc_err_p->ecc_afar, 64) +
3001772Sjl139090 ecc_err_p->ecc_offset;
3011772Sjl139090 ecc_err_p->ecc_aflt.flt_bus_id = bus_id;
3021772Sjl139090 ecc_err_p->ecc_aflt.flt_inst = 0;
3031772Sjl139090 ecc_err_p->ecc_aflt.flt_status = ECC_IOBUS;
3041772Sjl139090 ecc_err_p->ecc_aflt.flt_in_memory = 0;
3051772Sjl139090 ecc_err_p->ecc_aflt.flt_class = BUS_FAULT;
3061772Sjl139090 }
3071772Sjl139090
3081772Sjl139090 /*
3091772Sjl139090 * pcmu_ecc_check: Called by pcmu_ecc_err_handler() this function is responsible
3101772Sjl139090 * for calling pcmu_pbm_err_handler() and calling their children error
3111772Sjl139090 * handlers(via ndi_fm_handler_dispatch()).
3121772Sjl139090 */
3131772Sjl139090 static int
pcmu_ecc_check(pcmu_ecc_t * pecc_p,uint64_t fme_ena)3141772Sjl139090 pcmu_ecc_check(pcmu_ecc_t *pecc_p, uint64_t fme_ena)
3151772Sjl139090 {
3161772Sjl139090 ddi_fm_error_t derr;
3171772Sjl139090 int ret;
3181772Sjl139090 pcmu_t *pcmu_p;
3191772Sjl139090
3201772Sjl139090
3211772Sjl139090 ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
3221772Sjl139090
3231772Sjl139090 bzero(&derr, sizeof (ddi_fm_error_t));
3241772Sjl139090 derr.fme_version = DDI_FME_VERSION;
3251772Sjl139090 derr.fme_ena = fme_ena;
3261772Sjl139090 ret = DDI_FM_NONFATAL;
3271772Sjl139090
3281772Sjl139090 /*
3291772Sjl139090 * Need to report any PBM errors which may have caused or
3301772Sjl139090 * resulted from this error.
3311772Sjl139090 */
3321772Sjl139090 pcmu_p = pecc_p->pecc_pcmu_p;
3331772Sjl139090 if (pcmu_pbm_err_handler(pcmu_p->pcmu_dip, &derr, (void *)pcmu_p,
3341772Sjl139090 PCI_ECC_CALL) == DDI_FM_FATAL)
3351772Sjl139090 ret = DDI_FM_FATAL;
3361772Sjl139090
3371772Sjl139090 if (ret == DDI_FM_FATAL)
3381772Sjl139090 return (DDI_FM_FATAL);
3391772Sjl139090 else
3401772Sjl139090 return (DDI_FM_NONFATAL);
3411772Sjl139090 }
3421772Sjl139090
3431772Sjl139090 /*
3441772Sjl139090 * Function used to handle and log IO detected ECC errors, can be called by
3451772Sjl139090 * pcmu_ecc_intr and pcmu_err_callback(trap callback). Protected by
3461772Sjl139090 * pcmu_err_mutex.
3471772Sjl139090 */
3481772Sjl139090 int
pcmu_ecc_err_handler(pcmu_ecc_errstate_t * ecc_err_p)3491772Sjl139090 pcmu_ecc_err_handler(pcmu_ecc_errstate_t *ecc_err_p)
3501772Sjl139090 {
3511772Sjl139090 /* LINTED variable */
3521772Sjl139090 uint64_t pri_err, sec_err;
3531772Sjl139090 pcmu_ecc_intr_info_t *ecc_ii_p = &ecc_err_p->ecc_ii_p;
3541772Sjl139090 pcmu_ecc_t *pecc_p = ecc_ii_p->pecc_p;
3551772Sjl139090 /* LINTED variable */
3561772Sjl139090 pcmu_t *pcmu_p;
3571772Sjl139090 pcmu_cb_t *pcb_p;
3581772Sjl139090 int fatal = 0;
3591772Sjl139090 int nonfatal = 0;
3601772Sjl139090
3611772Sjl139090 ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
3621772Sjl139090
3631772Sjl139090 pcmu_p = pecc_p->pecc_pcmu_p;
3641772Sjl139090 pcb_p = pecc_p->pecc_pcmu_p->pcmu_cb_p;
3651772Sjl139090
3661772Sjl139090 pcmu_ecc_errstate_get(ecc_err_p);
3671772Sjl139090 pri_err = (ecc_err_p->ecc_afsr >> PCMU_ECC_UE_AFSR_PE_SHIFT) &
3681772Sjl139090 PCMU_ECC_UE_AFSR_E_MASK;
3691772Sjl139090
3701772Sjl139090 sec_err = (ecc_err_p->ecc_afsr >> PCMU_ECC_UE_AFSR_SE_SHIFT) &
3711772Sjl139090 PCMU_ECC_UE_AFSR_E_MASK;
3721772Sjl139090
3731772Sjl139090 switch (ecc_ii_p->pecc_type) {
3741772Sjl139090 case CBNINTR_UE:
3751772Sjl139090 if (pri_err) {
3761772Sjl139090 ecc_err_p->ecc_aflt.flt_synd = 0;
3771772Sjl139090 ecc_err_p->pecc_pri = 1;
3781772Sjl139090 pcmu_ecc_classify(pri_err, ecc_err_p);
3791772Sjl139090 errorq_dispatch(pcmu_ecc_queue, (void *)ecc_err_p,
3801772Sjl139090 sizeof (pcmu_ecc_errstate_t),
3811772Sjl139090 ecc_err_p->ecc_aflt.flt_panic);
3821772Sjl139090 }
3831772Sjl139090 if (sec_err) {
3841772Sjl139090 pcmu_ecc_errstate_t ecc_sec_err;
3851772Sjl139090
3861772Sjl139090 ecc_sec_err = *ecc_err_p;
3871772Sjl139090 ecc_sec_err.pecc_pri = 0;
3881772Sjl139090 pcmu_ecc_classify(sec_err, &ecc_sec_err);
3891772Sjl139090 pcmu_ecc_ereport_post(pcmu_p->pcmu_dip,
3901772Sjl139090 &ecc_sec_err);
3911772Sjl139090 }
3921772Sjl139090 /*
3931772Sjl139090 * Check for PCI bus errors that may have resulted from or
3941772Sjl139090 * caused this UE.
3951772Sjl139090 */
3961772Sjl139090 if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
3971772Sjl139090 pcmu_ecc_check(pecc_p, ecc_err_p->ecc_ena) == DDI_FM_FATAL)
3981772Sjl139090 ecc_err_p->ecc_aflt.flt_panic = 1;
3991772Sjl139090
4001772Sjl139090 if (ecc_err_p->ecc_aflt.flt_panic) {
4011772Sjl139090 /*
4021772Sjl139090 * Disable all further errors since this will be
4031772Sjl139090 * treated as a fatal error.
4041772Sjl139090 */
4051772Sjl139090 (void) pcmu_ecc_disable_nowait(pecc_p);
4061772Sjl139090 fatal++;
4071772Sjl139090 }
4081772Sjl139090 break;
4091772Sjl139090
4101772Sjl139090 default:
4111772Sjl139090 return (DDI_FM_OK);
4121772Sjl139090 }
4131772Sjl139090 /* Clear the errors */
4141772Sjl139090 stdphysio(ecc_ii_p->pecc_afsr_pa, ecc_err_p->ecc_afsr);
4151772Sjl139090 /*
4161772Sjl139090 * Clear the interrupt if called by pcmu_ecc_intr and UE error
4171772Sjl139090 * or if called by pcmu_ecc_intr and CE error and delayed CE
4181772Sjl139090 * interrupt handling is turned off.
4191772Sjl139090 */
4201772Sjl139090 if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
4211772Sjl139090 ecc_ii_p->pecc_type == CBNINTR_UE && !fatal)
4221772Sjl139090 pcmu_cb_clear_nintr(pcb_p, ecc_ii_p->pecc_type);
4231772Sjl139090 if (!fatal && !nonfatal)
4241772Sjl139090 return (DDI_FM_OK);
4251772Sjl139090 else if (fatal)
4261772Sjl139090 return (DDI_FM_FATAL);
4271772Sjl139090 return (DDI_FM_NONFATAL);
4281772Sjl139090 }
4291772Sjl139090
4301772Sjl139090 /*
4311772Sjl139090 * Function used to drain pcmu_ecc_queue, either during panic or after softint
4321772Sjl139090 * is generated, to log IO detected ECC errors.
4331772Sjl139090 */
434*2619Sjimand /* ARGSUSED */
4351772Sjl139090 void
pcmu_ecc_err_drain(void * not_used,pcmu_ecc_errstate_t * ecc_err)4361772Sjl139090 pcmu_ecc_err_drain(void *not_used, pcmu_ecc_errstate_t *ecc_err)
4371772Sjl139090 {
4381772Sjl139090 struct async_flt *ecc = &ecc_err->ecc_aflt;
4391772Sjl139090 pcmu_t *pcmu_p = ecc_err->pecc_p->pecc_pcmu_p;
4401772Sjl139090
4411772Sjl139090 ecc_cpu_call(ecc, ecc_err->ecc_unum, ECC_IO_UE);
4421772Sjl139090 ecc_err->ecc_err_type = "U";
4431772Sjl139090 pcmu_ecc_ereport_post(pcmu_p->pcmu_dip, ecc_err);
4441772Sjl139090 }
4451772Sjl139090
4461772Sjl139090 /*
4471772Sjl139090 * Function used to post IO detected ECC ereports.
4481772Sjl139090 */
4491772Sjl139090 static void
pcmu_ecc_ereport_post(dev_info_t * dip,pcmu_ecc_errstate_t * ecc_err)4501772Sjl139090 pcmu_ecc_ereport_post(dev_info_t *dip, pcmu_ecc_errstate_t *ecc_err)
4511772Sjl139090 {
4521772Sjl139090 char *aux_msg;
4531772Sjl139090 pcmu_t *pcmu_p;
4541772Sjl139090 int instance = ddi_get_instance(dip);
4551772Sjl139090
4561772Sjl139090 pcmu_p = get_pcmu_soft_state(instance);
4571772Sjl139090 if (ecc_err->pecc_pri) {
4581772Sjl139090 aux_msg = "PIO primary uncorrectable error";
4591772Sjl139090 } else {
4601772Sjl139090 aux_msg = "PIO secondary uncorrectable error";
4611772Sjl139090 }
4621772Sjl139090 cmn_err(CE_WARN, "%s %s: %s %s=0x%lx, %s=0x%lx, %s=0x%x",
4631772Sjl139090 (pcmu_p->pcmu_pcbm_p)->pcbm_nameinst_str,
4641772Sjl139090 (pcmu_p->pcmu_pcbm_p)->pcbm_nameaddr_str,
4651772Sjl139090 aux_msg, PCI_ECC_AFSR, ecc_err->ecc_afsr,
4661772Sjl139090 PCI_ECC_AFAR, ecc_err->ecc_aflt.flt_addr,
4671772Sjl139090 "portid", ecc_err->ecc_aflt.flt_bus_id);
4681772Sjl139090 }
469