10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/ddi.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/archsystm.h>
340Sstevel@tonic-gate #include <sys/vmsystm.h>
350Sstevel@tonic-gate #include <sys/machparam.h>
360Sstevel@tonic-gate #include <sys/machsystm.h>
370Sstevel@tonic-gate #include <sys/machthread.h>
380Sstevel@tonic-gate #include <sys/cpu.h>
390Sstevel@tonic-gate #include <sys/cmp.h>
400Sstevel@tonic-gate #include <sys/elf_SPARC.h>
410Sstevel@tonic-gate #include <vm/vm_dep.h>
420Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
430Sstevel@tonic-gate #include <vm/seg_kpm.h>
440Sstevel@tonic-gate #include <sys/cpuvar.h>
450Sstevel@tonic-gate #include <sys/cheetahregs.h>
460Sstevel@tonic-gate #include <sys/us3_module.h>
470Sstevel@tonic-gate #include <sys/async.h>
480Sstevel@tonic-gate #include <sys/cmn_err.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/dditypes.h>
510Sstevel@tonic-gate #include <sys/prom_debug.h>
520Sstevel@tonic-gate #include <sys/prom_plat.h>
530Sstevel@tonic-gate #include <sys/cpu_module.h>
540Sstevel@tonic-gate #include <sys/sysmacros.h>
550Sstevel@tonic-gate #include <sys/intreg.h>
560Sstevel@tonic-gate #include <sys/clock.h>
570Sstevel@tonic-gate #include <sys/platform_module.h>
580Sstevel@tonic-gate #include <sys/machtrap.h>
590Sstevel@tonic-gate #include <sys/ontrap.h>
600Sstevel@tonic-gate #include <sys/panic.h>
610Sstevel@tonic-gate #include <sys/memlist.h>
620Sstevel@tonic-gate #include <sys/bootconf.h>
630Sstevel@tonic-gate #include <sys/ivintr.h>
640Sstevel@tonic-gate #include <sys/atomic.h>
650Sstevel@tonic-gate #include <sys/taskq.h>
660Sstevel@tonic-gate #include <sys/note.h>
670Sstevel@tonic-gate #include <sys/ndifm.h>
680Sstevel@tonic-gate #include <sys/ddifm.h>
690Sstevel@tonic-gate #include <sys/fm/protocol.h>
700Sstevel@tonic-gate #include <sys/fm/util.h>
710Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-III.h>
720Sstevel@tonic-gate #include <sys/fpras_impl.h>
730Sstevel@tonic-gate #include <sys/dtrace.h>
740Sstevel@tonic-gate #include <sys/watchpoint.h>
750Sstevel@tonic-gate #include <sys/plat_ecc_unum.h>
760Sstevel@tonic-gate #include <sys/cyclic.h>
770Sstevel@tonic-gate #include <sys/errorq.h>
780Sstevel@tonic-gate #include <sys/errclassify.h>
790Sstevel@tonic-gate 
800Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
810Sstevel@tonic-gate #include <sys/xc_impl.h>
820Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
830Sstevel@tonic-gate 
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate  * Note that 'Cheetah PRM' refers to:
860Sstevel@tonic-gate  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
870Sstevel@tonic-gate  */
880Sstevel@tonic-gate 
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate  * Per CPU pointers to physical address of TL>0 logout data areas.
910Sstevel@tonic-gate  * These pointers have to be in the kernel nucleus to avoid MMU
920Sstevel@tonic-gate  * misses.
930Sstevel@tonic-gate  */
940Sstevel@tonic-gate uint64_t ch_err_tl1_paddrs[NCPU];
950Sstevel@tonic-gate 
960Sstevel@tonic-gate /*
970Sstevel@tonic-gate  * One statically allocated structure to use during startup/DR
980Sstevel@tonic-gate  * to prevent unnecessary panics.
990Sstevel@tonic-gate  */
1000Sstevel@tonic-gate ch_err_tl1_data_t ch_err_tl1_data;
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate /*
1030Sstevel@tonic-gate  * Per CPU pending error at TL>0, used by level15 softint handler
1040Sstevel@tonic-gate  */
1050Sstevel@tonic-gate uchar_t ch_err_tl1_pending[NCPU];
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate  * For deferred CE re-enable after trap.
1090Sstevel@tonic-gate  */
1100Sstevel@tonic-gate taskq_t		*ch_check_ce_tq;
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate /*
1130Sstevel@tonic-gate  * Internal functions.
1140Sstevel@tonic-gate  */
1150Sstevel@tonic-gate static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
1160Sstevel@tonic-gate static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
1170Sstevel@tonic-gate static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
1180Sstevel@tonic-gate     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
1190Sstevel@tonic-gate static int clear_ecc(struct async_flt *ecc);
1200Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
1210Sstevel@tonic-gate static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
1220Sstevel@tonic-gate #endif
1230Sstevel@tonic-gate static int cpu_ecache_set_size(struct cpu *cp);
1240Sstevel@tonic-gate static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
1250Sstevel@tonic-gate static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
1260Sstevel@tonic-gate static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
1270Sstevel@tonic-gate static int cpu_ectag_pa_to_subblk_state(int cachesize,
1280Sstevel@tonic-gate 				uint64_t subaddr, uint64_t tag);
1290Sstevel@tonic-gate static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
1300Sstevel@tonic-gate static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
1310Sstevel@tonic-gate static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
1320Sstevel@tonic-gate static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
1330Sstevel@tonic-gate static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
1340Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
1350Sstevel@tonic-gate static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
1360Sstevel@tonic-gate static void cpu_scrubphys(struct async_flt *aflt);
1370Sstevel@tonic-gate static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
1380Sstevel@tonic-gate     int *, int *);
1390Sstevel@tonic-gate static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
1400Sstevel@tonic-gate static void cpu_ereport_init(struct async_flt *aflt);
1410Sstevel@tonic-gate static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
1420Sstevel@tonic-gate static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
1430Sstevel@tonic-gate static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144815Sdilpreet     uint64_t nceen, ch_cpu_logout_t *clop);
1450Sstevel@tonic-gate static int cpu_ce_delayed_ec_logout(uint64_t);
1460Sstevel@tonic-gate static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
1490Sstevel@tonic-gate static int mondo_recover_proc(uint16_t, int);
1500Sstevel@tonic-gate static void cheetah_nudge_init(void);
1510Sstevel@tonic-gate static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
1520Sstevel@tonic-gate     cyc_time_t *when);
1530Sstevel@tonic-gate static void cheetah_nudge_buddy(void);
1540Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
1570Sstevel@tonic-gate static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
1580Sstevel@tonic-gate static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
1590Sstevel@tonic-gate static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
1600Sstevel@tonic-gate     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
1610Sstevel@tonic-gate static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
1620Sstevel@tonic-gate static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
1630Sstevel@tonic-gate static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
1640Sstevel@tonic-gate static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
1650Sstevel@tonic-gate static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
1660Sstevel@tonic-gate static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
1670Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate int (*p2get_mem_info)(int synd_code, uint64_t paddr,
1700Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1710Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp);
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when an ECC
1750Sstevel@tonic-gate  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
1760Sstevel@tonic-gate  * of this array have the following semantics:
1770Sstevel@tonic-gate  *
1780Sstevel@tonic-gate  *      00-127  The number of the bad bit, when only one bit is bad.
1790Sstevel@tonic-gate  *      128     ECC bit C0 is bad.
1800Sstevel@tonic-gate  *      129     ECC bit C1 is bad.
1810Sstevel@tonic-gate  *      130     ECC bit C2 is bad.
1820Sstevel@tonic-gate  *      131     ECC bit C3 is bad.
1830Sstevel@tonic-gate  *      132     ECC bit C4 is bad.
1840Sstevel@tonic-gate  *      133     ECC bit C5 is bad.
1850Sstevel@tonic-gate  *      134     ECC bit C6 is bad.
1860Sstevel@tonic-gate  *      135     ECC bit C7 is bad.
1870Sstevel@tonic-gate  *      136     ECC bit C8 is bad.
1880Sstevel@tonic-gate  *	137-143 reserved for Mtag Data and ECC.
1890Sstevel@tonic-gate  *      144(M2) Two bits are bad within a nibble.
1900Sstevel@tonic-gate  *      145(M3) Three bits are bad within a nibble.
1910Sstevel@tonic-gate  *      146(M3) Four bits are bad within a nibble.
1920Sstevel@tonic-gate  *      147(M)  Multiple bits (5 or more) are bad.
1930Sstevel@tonic-gate  *      148     NO bits are bad.
1940Sstevel@tonic-gate  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
1950Sstevel@tonic-gate  */
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate #define	C0	128
1980Sstevel@tonic-gate #define	C1	129
1990Sstevel@tonic-gate #define	C2	130
2000Sstevel@tonic-gate #define	C3	131
2010Sstevel@tonic-gate #define	C4	132
2020Sstevel@tonic-gate #define	C5	133
2030Sstevel@tonic-gate #define	C6	134
2040Sstevel@tonic-gate #define	C7	135
2050Sstevel@tonic-gate #define	C8	136
2060Sstevel@tonic-gate #define	MT0	137	/* Mtag Data bit 0 */
2070Sstevel@tonic-gate #define	MT1	138
2080Sstevel@tonic-gate #define	MT2	139
2090Sstevel@tonic-gate #define	MTC0	140	/* Mtag Check bit 0 */
2100Sstevel@tonic-gate #define	MTC1	141
2110Sstevel@tonic-gate #define	MTC2	142
2120Sstevel@tonic-gate #define	MTC3	143
2130Sstevel@tonic-gate #define	M2	144
2140Sstevel@tonic-gate #define	M3	145
2150Sstevel@tonic-gate #define	M4	146
2160Sstevel@tonic-gate #define	M	147
2170Sstevel@tonic-gate #define	NA	148
2180Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2190Sstevel@tonic-gate #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
2200Sstevel@tonic-gate #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
2210Sstevel@tonic-gate #define	SLAST	S003MEM	/* last special syndrome */
2220Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
2230Sstevel@tonic-gate #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
2240Sstevel@tonic-gate #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
2250Sstevel@tonic-gate #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
2260Sstevel@tonic-gate #define	SLAST	S11C	/* last special syndrome */
2270Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
2280Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2290Sstevel@tonic-gate #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
2300Sstevel@tonic-gate #define	BPAR15	167
2310Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate static uint8_t ecc_syndrome_tab[] =
2340Sstevel@tonic-gate {
2350Sstevel@tonic-gate NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
2360Sstevel@tonic-gate C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
2370Sstevel@tonic-gate C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
2380Sstevel@tonic-gate M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
2390Sstevel@tonic-gate C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
2400Sstevel@tonic-gate M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
2410Sstevel@tonic-gate M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
2420Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2430Sstevel@tonic-gate 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
2440Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
2450Sstevel@tonic-gate 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
2460Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
2470Sstevel@tonic-gate C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
2480Sstevel@tonic-gate M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
2490Sstevel@tonic-gate M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
2500Sstevel@tonic-gate 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
2510Sstevel@tonic-gate M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
2520Sstevel@tonic-gate 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
2530Sstevel@tonic-gate 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
2540Sstevel@tonic-gate M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
2550Sstevel@tonic-gate C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
2560Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
2570Sstevel@tonic-gate M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
2580Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
2590Sstevel@tonic-gate M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
2600Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
2610Sstevel@tonic-gate M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
2620Sstevel@tonic-gate 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
2630Sstevel@tonic-gate M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
2640Sstevel@tonic-gate 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
2650Sstevel@tonic-gate 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
2660Sstevel@tonic-gate M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
2670Sstevel@tonic-gate M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
2680Sstevel@tonic-gate 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
2690Sstevel@tonic-gate 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
2700Sstevel@tonic-gate M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
2710Sstevel@tonic-gate 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
2720Sstevel@tonic-gate M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
2730Sstevel@tonic-gate M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
2740Sstevel@tonic-gate 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
2750Sstevel@tonic-gate };
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate #if !(defined(JALAPENO) || defined(SERRANO))
2800Sstevel@tonic-gate /*
2810Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when a Mtag
2820Sstevel@tonic-gate  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
2830Sstevel@tonic-gate  * of this array have the following semantics:
2840Sstevel@tonic-gate  *
2850Sstevel@tonic-gate  *      -1	Invalid mtag syndrome.
2860Sstevel@tonic-gate  *      137     Mtag Data 0 is bad.
2870Sstevel@tonic-gate  *      138     Mtag Data 1 is bad.
2880Sstevel@tonic-gate  *      139     Mtag Data 2 is bad.
2890Sstevel@tonic-gate  *      140     Mtag ECC 0 is bad.
2900Sstevel@tonic-gate  *      141     Mtag ECC 1 is bad.
2910Sstevel@tonic-gate  *      142     Mtag ECC 2 is bad.
2920Sstevel@tonic-gate  *      143     Mtag ECC 3 is bad.
2930Sstevel@tonic-gate  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
2940Sstevel@tonic-gate  */
2950Sstevel@tonic-gate short mtag_syndrome_tab[] =
2960Sstevel@tonic-gate {
2970Sstevel@tonic-gate NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
2980Sstevel@tonic-gate };
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate #else /* !(JALAPENO || SERRANO) */
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate #define	BSYND_TBL_SIZE	16
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate #endif /* !(JALAPENO || SERRANO) */
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate /*
3090Sstevel@tonic-gate  * CE initial classification and subsequent action lookup table
3100Sstevel@tonic-gate  */
3110Sstevel@tonic-gate static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
3120Sstevel@tonic-gate static int ce_disp_inited;
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate /*
3150Sstevel@tonic-gate  * Set to disable leaky and partner check for memory correctables
3160Sstevel@tonic-gate  */
3170Sstevel@tonic-gate int ce_xdiag_off;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate /*
3200Sstevel@tonic-gate  * The following are not incremented atomically so are indicative only
3210Sstevel@tonic-gate  */
3220Sstevel@tonic-gate static int ce_xdiag_drops;
3230Sstevel@tonic-gate static int ce_xdiag_lkydrops;
3240Sstevel@tonic-gate static int ce_xdiag_ptnrdrops;
3250Sstevel@tonic-gate static int ce_xdiag_bad;
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate /*
3280Sstevel@tonic-gate  * CE leaky check callback structure
3290Sstevel@tonic-gate  */
3300Sstevel@tonic-gate typedef struct {
3310Sstevel@tonic-gate 	struct async_flt *lkycb_aflt;
3320Sstevel@tonic-gate 	errorq_t *lkycb_eqp;
3330Sstevel@tonic-gate 	errorq_elem_t *lkycb_eqep;
3340Sstevel@tonic-gate } ce_lkychk_cb_t;
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate  * defines for various ecache_flush_flag's
3380Sstevel@tonic-gate  */
3390Sstevel@tonic-gate #define	ECACHE_FLUSH_LINE	1
3400Sstevel@tonic-gate #define	ECACHE_FLUSH_ALL	2
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * STICK sync
3440Sstevel@tonic-gate  */
3450Sstevel@tonic-gate #define	STICK_ITERATION 10
3460Sstevel@tonic-gate #define	MAX_TSKEW	1
3470Sstevel@tonic-gate #define	EV_A_START	0
3480Sstevel@tonic-gate #define	EV_A_END	1
3490Sstevel@tonic-gate #define	EV_B_START	2
3500Sstevel@tonic-gate #define	EV_B_END	3
3510Sstevel@tonic-gate #define	EVENTS		4
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate static int64_t stick_iter = STICK_ITERATION;
3540Sstevel@tonic-gate static int64_t stick_tsk = MAX_TSKEW;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate typedef enum {
3570Sstevel@tonic-gate 	EVENT_NULL = 0,
3580Sstevel@tonic-gate 	SLAVE_START,
3590Sstevel@tonic-gate 	SLAVE_CONT,
3600Sstevel@tonic-gate 	MASTER_START
3610Sstevel@tonic-gate } event_cmd_t;
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
3640Sstevel@tonic-gate static int64_t timestamp[EVENTS];
3650Sstevel@tonic-gate static volatile int slave_done;
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate #ifdef DEBUG
3680Sstevel@tonic-gate #define	DSYNC_ATTEMPTS 64
3690Sstevel@tonic-gate typedef struct {
3700Sstevel@tonic-gate 	int64_t	skew_val[DSYNC_ATTEMPTS];
3710Sstevel@tonic-gate } ss_t;
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate ss_t stick_sync_stats[NCPU];
3740Sstevel@tonic-gate #endif /* DEBUG */
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate /*
3770Sstevel@tonic-gate  * Maximum number of contexts for Cheetah.
3780Sstevel@tonic-gate  */
3790Sstevel@tonic-gate #define	MAX_NCTXS	(1 << 13)
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate /* Will be set !NULL for Cheetah+ and derivatives. */
3820Sstevel@tonic-gate uchar_t *ctx_pgsz_array = NULL;
3830Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
3840Sstevel@tonic-gate static uchar_t ctx_pgsz_arr[MAX_NCTXS];
3850Sstevel@tonic-gate uint_t disable_dual_pgsz = 0;
3860Sstevel@tonic-gate #endif	/* CPU_IMP_DUAL_PAGESIZE */
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate /*
3890Sstevel@tonic-gate  * Save the cache bootup state for use when internal
3900Sstevel@tonic-gate  * caches are to be re-enabled after an error occurs.
3910Sstevel@tonic-gate  */
3920Sstevel@tonic-gate uint64_t cache_boot_state;
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate /*
3950Sstevel@tonic-gate  * PA[22:0] represent Displacement in Safari configuration space.
3960Sstevel@tonic-gate  */
3970Sstevel@tonic-gate uint_t	root_phys_addr_lo_mask = 0x7fffffu;
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate bus_config_eclk_t bus_config_eclk[] = {
4000Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
4010Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
4020Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
4030Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
4040Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
4050Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
4060Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
4070Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
4080Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
4090Sstevel@tonic-gate 	{0, 0}
4100Sstevel@tonic-gate };
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate /*
4130Sstevel@tonic-gate  * Interval for deferred CEEN reenable
4140Sstevel@tonic-gate  */
4150Sstevel@tonic-gate int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate /*
4180Sstevel@tonic-gate  * set in /etc/system to control logging of user BERR/TO's
4190Sstevel@tonic-gate  */
4200Sstevel@tonic-gate int cpu_berr_to_verbose = 0;
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate /*
4230Sstevel@tonic-gate  * set to 0 in /etc/system to defer CEEN reenable for all CEs
4240Sstevel@tonic-gate  */
4250Sstevel@tonic-gate uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
4260Sstevel@tonic-gate uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate /*
4290Sstevel@tonic-gate  * Set of all offline cpus
4300Sstevel@tonic-gate  */
4310Sstevel@tonic-gate cpuset_t cpu_offline_set;
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate static void cpu_delayed_check_ce_errors(void *);
4340Sstevel@tonic-gate static void cpu_check_ce_errors(void *);
4350Sstevel@tonic-gate void cpu_error_ecache_flush(ch_async_flt_t *);
4360Sstevel@tonic-gate static int cpu_error_ecache_flush_required(ch_async_flt_t *);
4370Sstevel@tonic-gate static void cpu_log_and_clear_ce(ch_async_flt_t *);
4380Sstevel@tonic-gate void cpu_ce_detected(ch_cpu_errors_t *, int);
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate /*
4410Sstevel@tonic-gate  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
4420Sstevel@tonic-gate  * memory refresh interval of current DIMMs (64ms).  After initial fix that
4430Sstevel@tonic-gate  * gives at least one full refresh cycle in which the cell can leak
4440Sstevel@tonic-gate  * (whereafter further refreshes simply reinforce any incorrect bit value).
4450Sstevel@tonic-gate  */
4460Sstevel@tonic-gate clock_t cpu_ce_lkychk_timeout_usec = 128000;
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate /*
4490Sstevel@tonic-gate  * CE partner check partner caching period in seconds
4500Sstevel@tonic-gate  */
4510Sstevel@tonic-gate int cpu_ce_ptnr_cachetime_sec = 60;
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate /*
4540Sstevel@tonic-gate  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
4550Sstevel@tonic-gate  */
4560Sstevel@tonic-gate #define	CH_SET_TRAP(ttentry, ttlabel)			\
4570Sstevel@tonic-gate 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
4580Sstevel@tonic-gate 		flush_instr_mem((caddr_t)&ttentry, 32);
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate static int min_ecache_size;
4610Sstevel@tonic-gate static uint_t priv_hcl_1;
4620Sstevel@tonic-gate static uint_t priv_hcl_2;
4630Sstevel@tonic-gate static uint_t priv_hcl_4;
4640Sstevel@tonic-gate static uint_t priv_hcl_8;
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate void
4670Sstevel@tonic-gate cpu_setup(void)
4680Sstevel@tonic-gate {
4690Sstevel@tonic-gate 	extern int at_flags;
4700Sstevel@tonic-gate 	extern int disable_delay_tlb_flush, delay_tlb_flush;
4710Sstevel@tonic-gate 	extern int cpc_has_overflow_intr;
4720Sstevel@tonic-gate 	extern int disable_text_largepages;
4730Sstevel@tonic-gate 	extern int use_text_pgsz4m;
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	/*
4760Sstevel@tonic-gate 	 * Setup chip-specific trap handlers.
4770Sstevel@tonic-gate 	 */
4780Sstevel@tonic-gate 	cpu_init_trap();
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	/*
4850Sstevel@tonic-gate 	 * save the cache bootup state.
4860Sstevel@tonic-gate 	 */
4870Sstevel@tonic-gate 	cache_boot_state = get_dcu() & DCU_CACHE;
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate 	/*
4900Sstevel@tonic-gate 	 * Use the maximum number of contexts available for Cheetah
4910Sstevel@tonic-gate 	 * unless it has been tuned for debugging.
4920Sstevel@tonic-gate 	 * We are checking against 0 here since this value can be patched
4930Sstevel@tonic-gate 	 * while booting.  It can not be patched via /etc/system since it
4940Sstevel@tonic-gate 	 * will be patched too late and thus cause the system to panic.
4950Sstevel@tonic-gate 	 */
4960Sstevel@tonic-gate 	if (nctxs == 0)
4970Sstevel@tonic-gate 		nctxs = MAX_NCTXS;
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	/*
5000Sstevel@tonic-gate 	 * Due to the number of entries in the fully-associative tlb
5010Sstevel@tonic-gate 	 * this may have to be tuned lower than in spitfire.
5020Sstevel@tonic-gate 	 */
5030Sstevel@tonic-gate 	pp_slots = MIN(8, MAXPP_SLOTS);
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/*
5060Sstevel@tonic-gate 	 * Block stores do not invalidate all pages of the d$, pagecopy
5070Sstevel@tonic-gate 	 * et. al. need virtual translations with virtual coloring taken
5080Sstevel@tonic-gate 	 * into consideration.  prefetch/ldd will pollute the d$ on the
5090Sstevel@tonic-gate 	 * load side.
5100Sstevel@tonic-gate 	 */
5110Sstevel@tonic-gate 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 	if (use_page_coloring) {
5140Sstevel@tonic-gate 		do_pg_coloring = 1;
5150Sstevel@tonic-gate 		if (use_virtual_coloring)
5160Sstevel@tonic-gate 			do_virtual_coloring = 1;
5170Sstevel@tonic-gate 	}
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate 	isa_list =
5200Sstevel@tonic-gate 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
5210Sstevel@tonic-gate 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
5220Sstevel@tonic-gate 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	/*
5250Sstevel@tonic-gate 	 * On Panther-based machines, this should
5260Sstevel@tonic-gate 	 * also include AV_SPARC_POPC too
5270Sstevel@tonic-gate 	 */
5280Sstevel@tonic-gate 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	/*
5310Sstevel@tonic-gate 	 * On cheetah, there's no hole in the virtual address space
5320Sstevel@tonic-gate 	 */
5330Sstevel@tonic-gate 	hole_start = hole_end = 0;
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 	/*
5360Sstevel@tonic-gate 	 * The kpm mapping window.
5370Sstevel@tonic-gate 	 * kpm_size:
5380Sstevel@tonic-gate 	 *	The size of a single kpm range.
5390Sstevel@tonic-gate 	 *	The overall size will be: kpm_size * vac_colors.
5400Sstevel@tonic-gate 	 * kpm_vbase:
5410Sstevel@tonic-gate 	 *	The virtual start address of the kpm range within the kernel
5420Sstevel@tonic-gate 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
5430Sstevel@tonic-gate 	 */
5440Sstevel@tonic-gate 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
5450Sstevel@tonic-gate 	kpm_size_shift = 43;
5460Sstevel@tonic-gate 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
5470Sstevel@tonic-gate 	kpm_smallpages = 1;
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate 	/*
5500Sstevel@tonic-gate 	 * The traptrace code uses either %tick or %stick for
5510Sstevel@tonic-gate 	 * timestamping.  We have %stick so we can use it.
5520Sstevel@tonic-gate 	 */
5530Sstevel@tonic-gate 	traptrace_use_stick = 1;
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 	/*
5560Sstevel@tonic-gate 	 * Cheetah has a performance counter overflow interrupt
5570Sstevel@tonic-gate 	 */
5580Sstevel@tonic-gate 	cpc_has_overflow_intr = 1;
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	/*
5610Sstevel@tonic-gate 	 * Use cheetah flush-all support
5620Sstevel@tonic-gate 	 */
5630Sstevel@tonic-gate 	if (!disable_delay_tlb_flush)
5640Sstevel@tonic-gate 		delay_tlb_flush = 1;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
5670Sstevel@tonic-gate 	/*
5680Sstevel@tonic-gate 	 * Use Cheetah+ and later dual page size support.
5690Sstevel@tonic-gate 	 */
5700Sstevel@tonic-gate 	if (!disable_dual_pgsz) {
5710Sstevel@tonic-gate 		ctx_pgsz_array = ctx_pgsz_arr;
5720Sstevel@tonic-gate 	}
5730Sstevel@tonic-gate #endif	/* CPU_IMP_DUAL_PAGESIZE */
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	/*
5760Sstevel@tonic-gate 	 * Declare that this architecture/cpu combination does fpRAS.
5770Sstevel@tonic-gate 	 */
5780Sstevel@tonic-gate 	fpras_implemented = 1;
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	/*
5810Sstevel@tonic-gate 	 * Enable 4M pages to be used for mapping user text by default.  Don't
5820Sstevel@tonic-gate 	 * use large pages for initialized data segments since we may not know
5830Sstevel@tonic-gate 	 * at exec() time what should be the preferred large page size for DTLB
5840Sstevel@tonic-gate 	 * programming.
5850Sstevel@tonic-gate 	 */
5860Sstevel@tonic-gate 	use_text_pgsz4m = 1;
5870Sstevel@tonic-gate 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
5880Sstevel@tonic-gate 	    (1 << TTE32M) | (1 << TTE256M);
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 	/*
5910Sstevel@tonic-gate 	 * Setup CE lookup table
5920Sstevel@tonic-gate 	 */
5930Sstevel@tonic-gate 	CE_INITDISPTBL_POPULATE(ce_disp_table);
5940Sstevel@tonic-gate 	ce_disp_inited = 1;
5950Sstevel@tonic-gate }
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate /*
5980Sstevel@tonic-gate  * Called by setcpudelay
5990Sstevel@tonic-gate  */
6000Sstevel@tonic-gate void
6010Sstevel@tonic-gate cpu_init_tick_freq(void)
6020Sstevel@tonic-gate {
6030Sstevel@tonic-gate 	/*
6040Sstevel@tonic-gate 	 * For UltraSPARC III and beyond we want to use the
6050Sstevel@tonic-gate 	 * system clock rate as the basis for low level timing,
6060Sstevel@tonic-gate 	 * due to support of mixed speed CPUs and power managment.
6070Sstevel@tonic-gate 	 */
6080Sstevel@tonic-gate 	if (system_clock_freq == 0)
6090Sstevel@tonic-gate 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	sys_tick_freq = system_clock_freq;
6120Sstevel@tonic-gate }
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
6150Sstevel@tonic-gate /*
6160Sstevel@tonic-gate  * Tunables
6170Sstevel@tonic-gate  */
6180Sstevel@tonic-gate int cheetah_bpe_off = 0;
6190Sstevel@tonic-gate int cheetah_sendmondo_recover = 1;
6200Sstevel@tonic-gate int cheetah_sendmondo_fullscan = 0;
6210Sstevel@tonic-gate int cheetah_sendmondo_recover_delay = 5;
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_MIN_DELAY	1
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate /*
6260Sstevel@tonic-gate  * Recovery Statistics
6270Sstevel@tonic-gate  */
6280Sstevel@tonic-gate typedef struct cheetah_livelock_entry	{
6290Sstevel@tonic-gate 	int cpuid;		/* fallen cpu */
6300Sstevel@tonic-gate 	int buddy;		/* cpu that ran recovery */
6310Sstevel@tonic-gate 	clock_t lbolt;		/* when recovery started */
6320Sstevel@tonic-gate 	hrtime_t recovery_time;	/* time spent in recovery */
6330Sstevel@tonic-gate } cheetah_livelock_entry_t;
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_NENTRY	32
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
6380Sstevel@tonic-gate int cheetah_livelock_entry_nxt;
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
6410Sstevel@tonic-gate 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
6420Sstevel@tonic-gate 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
6430Sstevel@tonic-gate 		cheetah_livelock_entry_nxt = 0;				\
6440Sstevel@tonic-gate 	}								\
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate struct {
6500Sstevel@tonic-gate 	hrtime_t hrt;		/* maximum recovery time */
6510Sstevel@tonic-gate 	int recovery;		/* recovered */
6520Sstevel@tonic-gate 	int full_claimed;	/* maximum pages claimed in full recovery */
6530Sstevel@tonic-gate 	int proc_entry;		/* attempted to claim TSB */
6540Sstevel@tonic-gate 	int proc_tsb_scan;	/* tsb scanned */
6550Sstevel@tonic-gate 	int proc_tsb_partscan;	/* tsb partially scanned */
6560Sstevel@tonic-gate 	int proc_tsb_fullscan;	/* whole tsb scanned */
6570Sstevel@tonic-gate 	int proc_claimed;	/* maximum pages claimed in tsb scan */
6580Sstevel@tonic-gate 	int proc_user;		/* user thread */
6590Sstevel@tonic-gate 	int proc_kernel;	/* kernel thread */
6600Sstevel@tonic-gate 	int proc_onflt;		/* bad stack */
6610Sstevel@tonic-gate 	int proc_cpu;		/* null cpu */
6620Sstevel@tonic-gate 	int proc_thread;	/* null thread */
6630Sstevel@tonic-gate 	int proc_proc;		/* null proc */
6640Sstevel@tonic-gate 	int proc_as;		/* null as */
6650Sstevel@tonic-gate 	int proc_hat;		/* null hat */
6660Sstevel@tonic-gate 	int proc_hat_inval;	/* hat contents don't make sense */
6670Sstevel@tonic-gate 	int proc_hat_busy;	/* hat is changing TSBs */
6680Sstevel@tonic-gate 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
6690Sstevel@tonic-gate 	int proc_cnum_bad;	/* cnum out of range */
6700Sstevel@tonic-gate 	int proc_cnum;		/* last cnum processed */
6710Sstevel@tonic-gate 	tte_t proc_tte;		/* last tte processed */
6720Sstevel@tonic-gate } cheetah_livelock_stat;
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
6770Sstevel@tonic-gate 	cheetah_livelock_stat.item = value
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
6800Sstevel@tonic-gate 	if (value > cheetah_livelock_stat.item)		\
6810Sstevel@tonic-gate 		cheetah_livelock_stat.item = value;	\
6820Sstevel@tonic-gate }
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate /*
6850Sstevel@tonic-gate  * Attempt to recover a cpu by claiming every cache line as saved
6860Sstevel@tonic-gate  * in the TSB that the non-responsive cpu is using. Since we can't
6870Sstevel@tonic-gate  * grab any adaptive lock, this is at best an attempt to do so. Because
6880Sstevel@tonic-gate  * we don't grab any locks, we must operate under the protection of
6890Sstevel@tonic-gate  * on_fault().
6900Sstevel@tonic-gate  *
6910Sstevel@tonic-gate  * Return 1 if cpuid could be recovered, 0 if failed.
6920Sstevel@tonic-gate  */
6930Sstevel@tonic-gate int
6940Sstevel@tonic-gate mondo_recover_proc(uint16_t cpuid, int bn)
6950Sstevel@tonic-gate {
6960Sstevel@tonic-gate 	label_t ljb;
6970Sstevel@tonic-gate 	cpu_t *cp;
6980Sstevel@tonic-gate 	kthread_t *t;
6990Sstevel@tonic-gate 	proc_t *p;
7000Sstevel@tonic-gate 	struct as *as;
7010Sstevel@tonic-gate 	struct hat *hat;
7020Sstevel@tonic-gate 	short  cnum;
7030Sstevel@tonic-gate 	struct tsb_info *tsbinfop;
7040Sstevel@tonic-gate 	struct tsbe *tsbep;
7050Sstevel@tonic-gate 	caddr_t tsbp;
7060Sstevel@tonic-gate 	caddr_t end_tsbp;
7070Sstevel@tonic-gate 	uint64_t paddr;
7080Sstevel@tonic-gate 	uint64_t idsr;
7090Sstevel@tonic-gate 	u_longlong_t pahi, palo;
7100Sstevel@tonic-gate 	int pages_claimed = 0;
7110Sstevel@tonic-gate 	tte_t tsbe_tte;
7120Sstevel@tonic-gate 	int tried_kernel_tsb = 0;
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(proc_entry);
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	if (on_fault(&ljb)) {
7170Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_onflt);
7180Sstevel@tonic-gate 		goto badstruct;
7190Sstevel@tonic-gate 	}
7200Sstevel@tonic-gate 
7210Sstevel@tonic-gate 	if ((cp = cpu[cpuid]) == NULL) {
7220Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_cpu);
7230Sstevel@tonic-gate 		goto badstruct;
7240Sstevel@tonic-gate 	}
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 	if ((t = cp->cpu_thread) == NULL) {
7270Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_thread);
7280Sstevel@tonic-gate 		goto badstruct;
7290Sstevel@tonic-gate 	}
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	if ((p = ttoproc(t)) == NULL) {
7320Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_proc);
7330Sstevel@tonic-gate 		goto badstruct;
7340Sstevel@tonic-gate 	}
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 	if ((as = p->p_as) == NULL) {
7370Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_as);
7380Sstevel@tonic-gate 		goto badstruct;
7390Sstevel@tonic-gate 	}
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 	if ((hat = as->a_hat) == NULL) {
7420Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_hat);
7430Sstevel@tonic-gate 		goto badstruct;
7440Sstevel@tonic-gate 	}
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	if (hat != ksfmmup) {
7470Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_user);
7480Sstevel@tonic-gate 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
7490Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
7500Sstevel@tonic-gate 			goto badstruct;
7510Sstevel@tonic-gate 		}
7520Sstevel@tonic-gate 		tsbinfop = hat->sfmmu_tsb;
7530Sstevel@tonic-gate 		if (tsbinfop == NULL) {
7540Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
7550Sstevel@tonic-gate 			goto badstruct;
7560Sstevel@tonic-gate 		}
7570Sstevel@tonic-gate 		tsbp = tsbinfop->tsb_va;
7580Sstevel@tonic-gate 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
7590Sstevel@tonic-gate 	} else {
7600Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_kernel);
7610Sstevel@tonic-gate 		tsbinfop = NULL;
7620Sstevel@tonic-gate 		tsbp = ktsb_base;
7630Sstevel@tonic-gate 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
7640Sstevel@tonic-gate 	}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	/* Verify as */
7670Sstevel@tonic-gate 	if (hat->sfmmu_as != as) {
7680Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
7690Sstevel@tonic-gate 		goto badstruct;
7700Sstevel@tonic-gate 	}
7710Sstevel@tonic-gate 
7720Sstevel@tonic-gate 	cnum = hat->sfmmu_cnum;
7730Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
7760Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
7770Sstevel@tonic-gate 		goto badstruct;
7780Sstevel@tonic-gate 	}
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 	do {
7810Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 		/*
7840Sstevel@tonic-gate 		 * Skip TSBs being relocated.  This is important because
7850Sstevel@tonic-gate 		 * we want to avoid the following deadlock scenario:
7860Sstevel@tonic-gate 		 *
7870Sstevel@tonic-gate 		 * 1) when we came in we set ourselves to "in recover" state.
7880Sstevel@tonic-gate 		 * 2) when we try to touch TSB being relocated the mapping
7890Sstevel@tonic-gate 		 *    will be in the suspended state so we'll spin waiting
7900Sstevel@tonic-gate 		 *    for it to be unlocked.
7910Sstevel@tonic-gate 		 * 3) when the CPU that holds the TSB mapping locked tries to
7920Sstevel@tonic-gate 		 *    unlock it it will send a xtrap which will fail to xcall
7930Sstevel@tonic-gate 		 *    us or the CPU we're trying to recover, and will in turn
7940Sstevel@tonic-gate 		 *    enter the mondo code.
7950Sstevel@tonic-gate 		 * 4) since we are still spinning on the locked mapping
7960Sstevel@tonic-gate 		 *    no further progress will be made and the system will
7970Sstevel@tonic-gate 		 *    inevitably hard hang.
7980Sstevel@tonic-gate 		 *
7990Sstevel@tonic-gate 		 * A TSB not being relocated can't begin being relocated
8000Sstevel@tonic-gate 		 * while we're accessing it because we check
8010Sstevel@tonic-gate 		 * sendmondo_in_recover before relocating TSBs.
8020Sstevel@tonic-gate 		 */
8030Sstevel@tonic-gate 		if (hat != ksfmmup &&
8040Sstevel@tonic-gate 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
8050Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
8060Sstevel@tonic-gate 			goto next_tsbinfo;
8070Sstevel@tonic-gate 		}
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 		for (tsbep = (struct tsbe *)tsbp;
8100Sstevel@tonic-gate 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
8110Sstevel@tonic-gate 			tsbe_tte = tsbep->tte_data;
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 			if (tsbe_tte.tte_val == 0) {
8140Sstevel@tonic-gate 				/*
8150Sstevel@tonic-gate 				 * Invalid tte
8160Sstevel@tonic-gate 				 */
8170Sstevel@tonic-gate 				continue;
8180Sstevel@tonic-gate 			}
8190Sstevel@tonic-gate 			if (tsbe_tte.tte_se) {
8200Sstevel@tonic-gate 				/*
8210Sstevel@tonic-gate 				 * Don't want device registers
8220Sstevel@tonic-gate 				 */
8230Sstevel@tonic-gate 				continue;
8240Sstevel@tonic-gate 			}
8250Sstevel@tonic-gate 			if (tsbe_tte.tte_cp == 0) {
8260Sstevel@tonic-gate 				/*
8270Sstevel@tonic-gate 				 * Must be cached in E$
8280Sstevel@tonic-gate 				 */
8290Sstevel@tonic-gate 				continue;
8300Sstevel@tonic-gate 			}
8310Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
8320Sstevel@tonic-gate 			idsr = getidsr();
8330Sstevel@tonic-gate 			if ((idsr & (IDSR_NACK_BIT(bn) |
8340Sstevel@tonic-gate 			    IDSR_BUSY_BIT(bn))) == 0) {
8350Sstevel@tonic-gate 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
8360Sstevel@tonic-gate 				goto done;
8370Sstevel@tonic-gate 			}
8380Sstevel@tonic-gate 			pahi = tsbe_tte.tte_pahi;
8390Sstevel@tonic-gate 			palo = tsbe_tte.tte_palo;
8400Sstevel@tonic-gate 			paddr = (uint64_t)((pahi << 32) |
8410Sstevel@tonic-gate 			    (palo << MMU_PAGESHIFT));
8420Sstevel@tonic-gate 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
8430Sstevel@tonic-gate 			    CH_ECACHE_SUBBLK_SIZE);
8440Sstevel@tonic-gate 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
8450Sstevel@tonic-gate 				shipit(cpuid, bn);
8460Sstevel@tonic-gate 			}
8470Sstevel@tonic-gate 			pages_claimed++;
8480Sstevel@tonic-gate 		}
8490Sstevel@tonic-gate next_tsbinfo:
8500Sstevel@tonic-gate 		if (tsbinfop != NULL)
8510Sstevel@tonic-gate 			tsbinfop = tsbinfop->tsb_next;
8520Sstevel@tonic-gate 		if (tsbinfop != NULL) {
8530Sstevel@tonic-gate 			tsbp = tsbinfop->tsb_va;
8540Sstevel@tonic-gate 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
8550Sstevel@tonic-gate 		} else if (tsbp == ktsb_base) {
8560Sstevel@tonic-gate 			tried_kernel_tsb = 1;
8570Sstevel@tonic-gate 		} else if (!tried_kernel_tsb) {
8580Sstevel@tonic-gate 			tsbp = ktsb_base;
8590Sstevel@tonic-gate 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
8600Sstevel@tonic-gate 			hat = ksfmmup;
8610Sstevel@tonic-gate 			tsbinfop = NULL;
8620Sstevel@tonic-gate 		}
8630Sstevel@tonic-gate 	} while (tsbinfop != NULL ||
8640Sstevel@tonic-gate 			((tsbp == ktsb_base) && !tried_kernel_tsb));
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
8670Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
8680Sstevel@tonic-gate 	no_fault();
8690Sstevel@tonic-gate 	idsr = getidsr();
8700Sstevel@tonic-gate 	if ((idsr & (IDSR_NACK_BIT(bn) |
8710Sstevel@tonic-gate 	    IDSR_BUSY_BIT(bn))) == 0) {
8720Sstevel@tonic-gate 		return (1);
8730Sstevel@tonic-gate 	} else {
8740Sstevel@tonic-gate 		return (0);
8750Sstevel@tonic-gate 	}
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate done:
8780Sstevel@tonic-gate 	no_fault();
8790Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
8800Sstevel@tonic-gate 	return (1);
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate badstruct:
8830Sstevel@tonic-gate 	no_fault();
8840Sstevel@tonic-gate 	return (0);
8850Sstevel@tonic-gate }
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate /*
8880Sstevel@tonic-gate  * Attempt to claim ownership, temporarily, of every cache line that a
8890Sstevel@tonic-gate  * non-responsive cpu might be using.  This might kick that cpu out of
8900Sstevel@tonic-gate  * this state.
8910Sstevel@tonic-gate  *
8920Sstevel@tonic-gate  * The return value indicates to the caller if we have exhausted all recovery
8930Sstevel@tonic-gate  * techniques. If 1 is returned, it is useless to call this function again
8940Sstevel@tonic-gate  * even for a different target CPU.
8950Sstevel@tonic-gate  */
8960Sstevel@tonic-gate int
8970Sstevel@tonic-gate mondo_recover(uint16_t cpuid, int bn)
8980Sstevel@tonic-gate {
8990Sstevel@tonic-gate 	struct memseg *seg;
9000Sstevel@tonic-gate 	uint64_t begin_pa, end_pa, cur_pa;
9010Sstevel@tonic-gate 	hrtime_t begin_hrt, end_hrt;
9020Sstevel@tonic-gate 	int retval = 0;
9030Sstevel@tonic-gate 	int pages_claimed = 0;
9040Sstevel@tonic-gate 	cheetah_livelock_entry_t *histp;
9050Sstevel@tonic-gate 	uint64_t idsr;
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
9080Sstevel@tonic-gate 		/*
9090Sstevel@tonic-gate 		 * Wait while recovery takes place
9100Sstevel@tonic-gate 		 */
9110Sstevel@tonic-gate 		while (sendmondo_in_recover) {
9120Sstevel@tonic-gate 			drv_usecwait(1);
9130Sstevel@tonic-gate 		}
9140Sstevel@tonic-gate 		/*
9150Sstevel@tonic-gate 		 * Assume we didn't claim the whole memory. If
9160Sstevel@tonic-gate 		 * the target of this caller is not recovered,
9170Sstevel@tonic-gate 		 * it will come back.
9180Sstevel@tonic-gate 		 */
9190Sstevel@tonic-gate 		return (retval);
9200Sstevel@tonic-gate 	}
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
9230Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
9240Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
9250Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate 	begin_hrt = gethrtime_waitfree();
9280Sstevel@tonic-gate 	/*
9290Sstevel@tonic-gate 	 * First try to claim the lines in the TSB the target
9300Sstevel@tonic-gate 	 * may have been using.
9310Sstevel@tonic-gate 	 */
9320Sstevel@tonic-gate 	if (mondo_recover_proc(cpuid, bn) == 1) {
9330Sstevel@tonic-gate 		/*
9340Sstevel@tonic-gate 		 * Didn't claim the whole memory
9350Sstevel@tonic-gate 		 */
9360Sstevel@tonic-gate 		goto done;
9370Sstevel@tonic-gate 	}
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	/*
9400Sstevel@tonic-gate 	 * We tried using the TSB. The target is still
9410Sstevel@tonic-gate 	 * not recovered. Check if complete memory scan is
9420Sstevel@tonic-gate 	 * enabled.
9430Sstevel@tonic-gate 	 */
9440Sstevel@tonic-gate 	if (cheetah_sendmondo_fullscan == 0) {
9450Sstevel@tonic-gate 		/*
9460Sstevel@tonic-gate 		 * Full memory scan is disabled.
9470Sstevel@tonic-gate 		 */
9480Sstevel@tonic-gate 		retval = 1;
9490Sstevel@tonic-gate 		goto done;
9500Sstevel@tonic-gate 	}
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	/*
9530Sstevel@tonic-gate 	 * Try claiming the whole memory.
9540Sstevel@tonic-gate 	 */
9550Sstevel@tonic-gate 	for (seg = memsegs; seg; seg = seg->next) {
9560Sstevel@tonic-gate 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
9570Sstevel@tonic-gate 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
9580Sstevel@tonic-gate 		for (cur_pa = begin_pa; cur_pa < end_pa;
9590Sstevel@tonic-gate 		    cur_pa += MMU_PAGESIZE) {
9600Sstevel@tonic-gate 			idsr = getidsr();
9610Sstevel@tonic-gate 			if ((idsr & (IDSR_NACK_BIT(bn) |
9620Sstevel@tonic-gate 			    IDSR_BUSY_BIT(bn))) == 0) {
9630Sstevel@tonic-gate 				/*
9640Sstevel@tonic-gate 				 * Didn't claim all memory
9650Sstevel@tonic-gate 				 */
9660Sstevel@tonic-gate 				goto done;
9670Sstevel@tonic-gate 			}
9680Sstevel@tonic-gate 			claimlines(cur_pa, MMU_PAGESIZE,
9690Sstevel@tonic-gate 			    CH_ECACHE_SUBBLK_SIZE);
9700Sstevel@tonic-gate 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
9710Sstevel@tonic-gate 				shipit(cpuid, bn);
9720Sstevel@tonic-gate 			}
9730Sstevel@tonic-gate 			pages_claimed++;
9740Sstevel@tonic-gate 		}
9750Sstevel@tonic-gate 	}
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	/*
9780Sstevel@tonic-gate 	 * We did all we could.
9790Sstevel@tonic-gate 	 */
9800Sstevel@tonic-gate 	retval = 1;
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate done:
9830Sstevel@tonic-gate 	/*
9840Sstevel@tonic-gate 	 * Update statistics
9850Sstevel@tonic-gate 	 */
9860Sstevel@tonic-gate 	end_hrt = gethrtime_waitfree();
9870Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(recovery);
9880Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
9890Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
9900Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
9910Sstevel@tonic-gate 	    (end_hrt -  begin_hrt));
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
9940Sstevel@tonic-gate 
9950Sstevel@tonic-gate 	return (retval);
9960Sstevel@tonic-gate }
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate /*
9990Sstevel@tonic-gate  * This is called by the cyclic framework when this CPU becomes online
10000Sstevel@tonic-gate  */
10010Sstevel@tonic-gate /*ARGSUSED*/
10020Sstevel@tonic-gate static void
10030Sstevel@tonic-gate cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
10040Sstevel@tonic-gate {
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
10070Sstevel@tonic-gate 	hdlr->cyh_level = CY_LOW_LEVEL;
10080Sstevel@tonic-gate 	hdlr->cyh_arg = NULL;
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 	/*
10110Sstevel@tonic-gate 	 * Stagger the start time
10120Sstevel@tonic-gate 	 */
10130Sstevel@tonic-gate 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
10140Sstevel@tonic-gate 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
10150Sstevel@tonic-gate 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
10160Sstevel@tonic-gate 	}
10170Sstevel@tonic-gate 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
10180Sstevel@tonic-gate }
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate /*
10210Sstevel@tonic-gate  * Create a low level cyclic to send a xtrap to the next cpu online.
10220Sstevel@tonic-gate  * However, there's no need to have this running on a uniprocessor system.
10230Sstevel@tonic-gate  */
10240Sstevel@tonic-gate static void
10250Sstevel@tonic-gate cheetah_nudge_init(void)
10260Sstevel@tonic-gate {
10270Sstevel@tonic-gate 	cyc_omni_handler_t hdlr;
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	if (max_ncpus == 1) {
10300Sstevel@tonic-gate 		return;
10310Sstevel@tonic-gate 	}
10320Sstevel@tonic-gate 
10330Sstevel@tonic-gate 	hdlr.cyo_online = cheetah_nudge_onln;
10340Sstevel@tonic-gate 	hdlr.cyo_offline = NULL;
10350Sstevel@tonic-gate 	hdlr.cyo_arg = NULL;
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10380Sstevel@tonic-gate 	(void) cyclic_add_omni(&hdlr);
10390Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10400Sstevel@tonic-gate }
10410Sstevel@tonic-gate 
10420Sstevel@tonic-gate /*
10430Sstevel@tonic-gate  * Cyclic handler to wake up buddy
10440Sstevel@tonic-gate  */
10450Sstevel@tonic-gate void
10460Sstevel@tonic-gate cheetah_nudge_buddy(void)
10470Sstevel@tonic-gate {
10480Sstevel@tonic-gate 	/*
10490Sstevel@tonic-gate 	 * Disable kernel preemption to protect the cpu list
10500Sstevel@tonic-gate 	 */
10510Sstevel@tonic-gate 	kpreempt_disable();
10520Sstevel@tonic-gate 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
10530Sstevel@tonic-gate 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
10540Sstevel@tonic-gate 		    0, 0);
10550Sstevel@tonic-gate 	}
10560Sstevel@tonic-gate 	kpreempt_enable();
10570Sstevel@tonic-gate }
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
10620Sstevel@tonic-gate uint32_t x_one_stimes[64];
10630Sstevel@tonic-gate uint32_t x_one_ltimes[16];
10640Sstevel@tonic-gate uint32_t x_set_stimes[64];
10650Sstevel@tonic-gate uint32_t x_set_ltimes[16];
10660Sstevel@tonic-gate uint32_t x_set_cpus[NCPU];
10670Sstevel@tonic-gate uint32_t x_nack_stimes[64];
10680Sstevel@tonic-gate #endif
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate /*
10710Sstevel@tonic-gate  * Note: A version of this function is used by the debugger via the KDI,
10720Sstevel@tonic-gate  * and must be kept in sync with this version.  Any changes made to this
10730Sstevel@tonic-gate  * function to support new chips or to accomodate errata must also be included
10740Sstevel@tonic-gate  * in the KDI-specific version.  See us3_kdi.c.
10750Sstevel@tonic-gate  */
10760Sstevel@tonic-gate void
10770Sstevel@tonic-gate send_one_mondo(int cpuid)
10780Sstevel@tonic-gate {
10790Sstevel@tonic-gate 	int busy, nack;
10800Sstevel@tonic-gate 	uint64_t idsr, starttick, endtick, tick, lasttick;
10810Sstevel@tonic-gate 	uint64_t busymask;
10820Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
10830Sstevel@tonic-gate 	int recovered = 0;
10840Sstevel@tonic-gate #endif
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
10870Sstevel@tonic-gate 	starttick = lasttick = gettick();
10880Sstevel@tonic-gate 	shipit(cpuid, 0);
10890Sstevel@tonic-gate 	endtick = starttick + xc_tick_limit;
10900Sstevel@tonic-gate 	busy = nack = 0;
10910Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
10920Sstevel@tonic-gate 	/*
10930Sstevel@tonic-gate 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
10940Sstevel@tonic-gate 	 * will be used for dispatching interrupt. For now, assume
10950Sstevel@tonic-gate 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
10960Sstevel@tonic-gate 	 * issues with respect to BUSY/NACK pair usage.
10970Sstevel@tonic-gate 	 */
10980Sstevel@tonic-gate 	busymask  = IDSR_BUSY_BIT(cpuid);
10990Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
11000Sstevel@tonic-gate 	busymask = IDSR_BUSY;
11010Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
11020Sstevel@tonic-gate 	for (;;) {
11030Sstevel@tonic-gate 		idsr = getidsr();
11040Sstevel@tonic-gate 		if (idsr == 0)
11050Sstevel@tonic-gate 			break;
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 		tick = gettick();
11080Sstevel@tonic-gate 		/*
11090Sstevel@tonic-gate 		 * If there is a big jump between the current tick
11100Sstevel@tonic-gate 		 * count and lasttick, we have probably hit a break
11110Sstevel@tonic-gate 		 * point.  Adjust endtick accordingly to avoid panic.
11120Sstevel@tonic-gate 		 */
11130Sstevel@tonic-gate 		if (tick > (lasttick + xc_tick_jump_limit))
11140Sstevel@tonic-gate 			endtick += (tick - lasttick);
11150Sstevel@tonic-gate 		lasttick = tick;
11160Sstevel@tonic-gate 		if (tick > endtick) {
11170Sstevel@tonic-gate 			if (panic_quiesce)
11180Sstevel@tonic-gate 				return;
11190Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
11200Sstevel@tonic-gate 			if (cheetah_sendmondo_recover && recovered == 0) {
11210Sstevel@tonic-gate 				if (mondo_recover(cpuid, 0)) {
11220Sstevel@tonic-gate 					/*
11230Sstevel@tonic-gate 					 * We claimed the whole memory or
11240Sstevel@tonic-gate 					 * full scan is disabled.
11250Sstevel@tonic-gate 					 */
11260Sstevel@tonic-gate 					recovered++;
11270Sstevel@tonic-gate 				}
11280Sstevel@tonic-gate 				tick = gettick();
11290Sstevel@tonic-gate 				endtick = tick + xc_tick_limit;
11300Sstevel@tonic-gate 				lasttick = tick;
11310Sstevel@tonic-gate 				/*
11320Sstevel@tonic-gate 				 * Recheck idsr
11330Sstevel@tonic-gate 				 */
11340Sstevel@tonic-gate 				continue;
11350Sstevel@tonic-gate 			} else
11360Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
11370Sstevel@tonic-gate 			{
11380Sstevel@tonic-gate 				cmn_err(CE_PANIC, "send mondo timeout "
11390Sstevel@tonic-gate 				    "(target 0x%x) [%d NACK %d BUSY]",
11400Sstevel@tonic-gate 				    cpuid, nack, busy);
11410Sstevel@tonic-gate 			}
11420Sstevel@tonic-gate 		}
11430Sstevel@tonic-gate 
11440Sstevel@tonic-gate 		if (idsr & busymask) {
11450Sstevel@tonic-gate 			busy++;
11460Sstevel@tonic-gate 			continue;
11470Sstevel@tonic-gate 		}
11480Sstevel@tonic-gate 		drv_usecwait(1);
11490Sstevel@tonic-gate 		shipit(cpuid, 0);
11500Sstevel@tonic-gate 		nack++;
11510Sstevel@tonic-gate 		busy = 0;
11520Sstevel@tonic-gate 	}
11530Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
11540Sstevel@tonic-gate 	{
11550Sstevel@tonic-gate 		int n = gettick() - starttick;
11560Sstevel@tonic-gate 		if (n < 8192)
11570Sstevel@tonic-gate 			x_one_stimes[n >> 7]++;
11580Sstevel@tonic-gate 		else
11590Sstevel@tonic-gate 			x_one_ltimes[(n >> 13) & 0xf]++;
11600Sstevel@tonic-gate 	}
11610Sstevel@tonic-gate #endif
11620Sstevel@tonic-gate }
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate void
11650Sstevel@tonic-gate syncfpu(void)
11660Sstevel@tonic-gate {
11670Sstevel@tonic-gate }
11680Sstevel@tonic-gate 
11690Sstevel@tonic-gate /*
11700Sstevel@tonic-gate  * Return processor specific async error structure
11710Sstevel@tonic-gate  * size used.
11720Sstevel@tonic-gate  */
11730Sstevel@tonic-gate int
11740Sstevel@tonic-gate cpu_aflt_size(void)
11750Sstevel@tonic-gate {
11760Sstevel@tonic-gate 	return (sizeof (ch_async_flt_t));
11770Sstevel@tonic-gate }
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate /*
1180*960Srscott  * Tunable to disable the checking of other cpu logout areas during panic for
1181*960Srscott  * potential syndrome 71 generating errors.
1182*960Srscott  */
1183*960Srscott int enable_check_other_cpus_logout = 1;
1184*960Srscott 
1185*960Srscott /*
1186*960Srscott  * Check other cpus logout area for potential synd 71 generating
1187*960Srscott  * errors.
1188*960Srscott  */
1189*960Srscott static void
1190*960Srscott cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1191*960Srscott     ch_cpu_logout_t *clop)
1192*960Srscott {
1193*960Srscott 	struct async_flt *aflt;
1194*960Srscott 	ch_async_flt_t ch_flt;
1195*960Srscott 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1196*960Srscott 
1197*960Srscott 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1198*960Srscott 		return;
1199*960Srscott 	}
1200*960Srscott 
1201*960Srscott 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1202*960Srscott 
1203*960Srscott 	t_afar = clop->clo_data.chd_afar;
1204*960Srscott 	t_afsr = clop->clo_data.chd_afsr;
1205*960Srscott 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1206*960Srscott #if defined(SERRANO)
1207*960Srscott 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1208*960Srscott #endif	/* SERRANO */
1209*960Srscott 
1210*960Srscott 	/*
1211*960Srscott 	 * In order to simplify code, we maintain this afsr_errs
1212*960Srscott 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1213*960Srscott 	 * sticky bits.
1214*960Srscott 	 */
1215*960Srscott 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1216*960Srscott 	    (t_afsr & C_AFSR_ALL_ERRS);
1217*960Srscott 
1218*960Srscott 	/* Setup the async fault structure */
1219*960Srscott 	aflt = (struct async_flt *)&ch_flt;
1220*960Srscott 	aflt->flt_id = gethrtime_waitfree();
1221*960Srscott 	ch_flt.afsr_ext = t_afsr_ext;
1222*960Srscott 	ch_flt.afsr_errs = t_afsr_errs;
1223*960Srscott 	aflt->flt_stat = t_afsr;
1224*960Srscott 	aflt->flt_addr = t_afar;
1225*960Srscott 	aflt->flt_bus_id = cpuid;
1226*960Srscott 	aflt->flt_inst = cpuid;
1227*960Srscott 	aflt->flt_pc = tpc;
1228*960Srscott 	aflt->flt_prot = AFLT_PROT_NONE;
1229*960Srscott 	aflt->flt_class = CPU_FAULT;
1230*960Srscott 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1231*960Srscott 	aflt->flt_tl = tl;
1232*960Srscott 	aflt->flt_status = ecc_type;
1233*960Srscott 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1234*960Srscott 
1235*960Srscott 	/*
1236*960Srscott 	 * Queue events on the async event queue, one event per error bit.
1237*960Srscott 	 * If no events are queued, queue an event to complain.
1238*960Srscott 	 */
1239*960Srscott 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1240*960Srscott 		ch_flt.flt_type = CPU_INV_AFSR;
1241*960Srscott 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1242*960Srscott 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1243*960Srscott 		    aflt->flt_panic);
1244*960Srscott 	}
1245*960Srscott 
1246*960Srscott 	/*
1247*960Srscott 	 * Zero out + invalidate CPU logout.
1248*960Srscott 	 */
1249*960Srscott 	bzero(clop, sizeof (ch_cpu_logout_t));
1250*960Srscott 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1251*960Srscott }
1252*960Srscott 
1253*960Srscott /*
1254*960Srscott  * Check the logout areas of all other cpus for unlogged errors.
1255*960Srscott  */
1256*960Srscott static void
1257*960Srscott cpu_check_other_cpus_logout(void)
1258*960Srscott {
1259*960Srscott 	int i, j;
1260*960Srscott 	processorid_t myid;
1261*960Srscott 	struct cpu *cp;
1262*960Srscott 	ch_err_tl1_data_t *cl1p;
1263*960Srscott 
1264*960Srscott 	myid = CPU->cpu_id;
1265*960Srscott 	for (i = 0; i < NCPU; i++) {
1266*960Srscott 		cp = cpu[i];
1267*960Srscott 
1268*960Srscott 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1269*960Srscott 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1270*960Srscott 			continue;
1271*960Srscott 		}
1272*960Srscott 
1273*960Srscott 		/*
1274*960Srscott 		 * Check each of the tl>0 logout areas
1275*960Srscott 		 */
1276*960Srscott 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1277*960Srscott 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1278*960Srscott 			if (cl1p->ch_err_tl1_flags == 0)
1279*960Srscott 				continue;
1280*960Srscott 
1281*960Srscott 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1282*960Srscott 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1283*960Srscott 		}
1284*960Srscott 
1285*960Srscott 		/*
1286*960Srscott 		 * Check each of the remaining logout areas
1287*960Srscott 		 */
1288*960Srscott 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1289*960Srscott 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1290*960Srscott 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1291*960Srscott 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1292*960Srscott 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1293*960Srscott 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1294*960Srscott 	}
1295*960Srscott }
1296*960Srscott 
1297*960Srscott /*
12980Sstevel@tonic-gate  * The fast_ecc_err handler transfers control here for UCU, UCC events.
12990Sstevel@tonic-gate  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
13000Sstevel@tonic-gate  * flush the error that caused the UCU/UCC, then again here at the end to
13010Sstevel@tonic-gate  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
13020Sstevel@tonic-gate  * the probability of getting a TL>1 Fast ECC trap when we're fielding
13030Sstevel@tonic-gate  * another Fast ECC trap.
13040Sstevel@tonic-gate  *
13050Sstevel@tonic-gate  * Cheetah+ also handles: TSCE: No additional processing required.
13060Sstevel@tonic-gate  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
13070Sstevel@tonic-gate  *
13080Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
13090Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
13100Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
13110Sstevel@tonic-gate  */
13120Sstevel@tonic-gate /*ARGSUSED*/
13130Sstevel@tonic-gate void
13140Sstevel@tonic-gate cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
13150Sstevel@tonic-gate {
13160Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
1317815Sdilpreet 	uint64_t ceen, nceen;
13180Sstevel@tonic-gate 
13190Sstevel@tonic-gate 	/*
13200Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
13210Sstevel@tonic-gate 	 * pointer, then we will have to make due without any detailed
13220Sstevel@tonic-gate 	 * logout information.
13230Sstevel@tonic-gate 	 */
13240Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
13250Sstevel@tonic-gate 		clop = NULL;
13260Sstevel@tonic-gate 		ceen = p_clo_flags & EN_REG_CEEN;
1327815Sdilpreet 		nceen = p_clo_flags & EN_REG_NCEEN;
13280Sstevel@tonic-gate 	} else {
13290Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
13300Sstevel@tonic-gate 		ceen = clop->clo_flags & EN_REG_CEEN;
1331815Sdilpreet 		nceen = clop->clo_flags & EN_REG_NCEEN;
13320Sstevel@tonic-gate 	}
13330Sstevel@tonic-gate 
13340Sstevel@tonic-gate 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1335815Sdilpreet 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
13360Sstevel@tonic-gate }
13370Sstevel@tonic-gate 
13380Sstevel@tonic-gate /*
13390Sstevel@tonic-gate  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
13400Sstevel@tonic-gate  * ECC at TL>0.  Need to supply either a error register pointer or a
13410Sstevel@tonic-gate  * cpu logout structure pointer.
13420Sstevel@tonic-gate  */
13430Sstevel@tonic-gate static void
13440Sstevel@tonic-gate cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1345815Sdilpreet     uint64_t nceen, ch_cpu_logout_t *clop)
13460Sstevel@tonic-gate {
13470Sstevel@tonic-gate 	struct async_flt *aflt;
13480Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
13490Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
13500Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
13510Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
13540Sstevel@tonic-gate 	/*
13550Sstevel@tonic-gate 	 * If no cpu logout data, then we will have to make due without
13560Sstevel@tonic-gate 	 * any detailed logout information.
13570Sstevel@tonic-gate 	 */
13580Sstevel@tonic-gate 	if (clop == NULL) {
13590Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
13600Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
13610Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
13620Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
13630Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
13640Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
13650Sstevel@tonic-gate #if defined(SERRANO)
13660Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
13670Sstevel@tonic-gate #endif	/* SERRANO */
13680Sstevel@tonic-gate 	} else {
13690Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
13700Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
13710Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
13720Sstevel@tonic-gate #if defined(SERRANO)
13730Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
13740Sstevel@tonic-gate #endif	/* SERRANO */
13750Sstevel@tonic-gate 	}
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate 	/*
13780Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
13790Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
13800Sstevel@tonic-gate 	 * sticky bits.
13810Sstevel@tonic-gate 	 */
13820Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
13830Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
13840Sstevel@tonic-gate 	pr_reason[0] = '\0';
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	/* Setup the async fault structure */
13870Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
13880Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
13890Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
13900Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
13910Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
13920Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
13930Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
13940Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
13950Sstevel@tonic-gate 	aflt->flt_pc = tpc;
13960Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
13970Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
13980Sstevel@tonic-gate 	aflt->flt_priv = priv;
13990Sstevel@tonic-gate 	aflt->flt_tl = tl;
14000Sstevel@tonic-gate 	aflt->flt_status = ECC_F_TRAP;
14010Sstevel@tonic-gate 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
14020Sstevel@tonic-gate 
14030Sstevel@tonic-gate 	/*
14040Sstevel@tonic-gate 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
14050Sstevel@tonic-gate 	 * cmn_err messages out to the console.  The situation is a UCU (in
14060Sstevel@tonic-gate 	 * priv mode) which causes a WDU which causes a UE (on the retry).
14070Sstevel@tonic-gate 	 * The messages for the UCU and WDU are enqueued and then pulled off
14080Sstevel@tonic-gate 	 * the async queue via softint and syslogd starts to process them
14090Sstevel@tonic-gate 	 * but doesn't get them to the console.  The UE causes a panic, but
14100Sstevel@tonic-gate 	 * since the UCU/WDU messages are already in transit, those aren't
14110Sstevel@tonic-gate 	 * on the async queue.  The hack is to check if we have a matching
14120Sstevel@tonic-gate 	 * WDU event for the UCU, and if it matches, we're more than likely
14130Sstevel@tonic-gate 	 * going to panic with a UE, unless we're under protection.  So, we
14140Sstevel@tonic-gate 	 * check to see if we got a matching WDU event and if we're under
14150Sstevel@tonic-gate 	 * protection.
14160Sstevel@tonic-gate 	 *
14170Sstevel@tonic-gate 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
14180Sstevel@tonic-gate 	 * looks like this:
14190Sstevel@tonic-gate 	 *    UCU->WDU->UE
14200Sstevel@tonic-gate 	 * For Panther, it could look like either of these:
14210Sstevel@tonic-gate 	 *    UCU---->WDU->L3_WDU->UE
14220Sstevel@tonic-gate 	 *    L3_UCU->WDU->L3_WDU->UE
14230Sstevel@tonic-gate 	 */
14240Sstevel@tonic-gate 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
14250Sstevel@tonic-gate 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
14260Sstevel@tonic-gate 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
14270Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
14280Sstevel@tonic-gate 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
14290Sstevel@tonic-gate 		    (cpu_error_regs.afar == t_afar));
14300Sstevel@tonic-gate 		aflt->flt_panic |= ((clop == NULL) &&
14310Sstevel@tonic-gate 		    (t_afsr_errs & C_AFSR_WDU));
14320Sstevel@tonic-gate 	}
14330Sstevel@tonic-gate 
14340Sstevel@tonic-gate 	/*
14350Sstevel@tonic-gate 	 * Queue events on the async event queue, one event per error bit.
14360Sstevel@tonic-gate 	 * If no events are queued or no Fast ECC events are on in the AFSR,
14370Sstevel@tonic-gate 	 * queue an event to complain.
14380Sstevel@tonic-gate 	 */
14390Sstevel@tonic-gate 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
14400Sstevel@tonic-gate 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
14410Sstevel@tonic-gate 		ch_flt.flt_type = CPU_INV_AFSR;
14420Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
14430Sstevel@tonic-gate 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
14440Sstevel@tonic-gate 		    aflt->flt_panic);
14450Sstevel@tonic-gate 	}
14460Sstevel@tonic-gate 
14470Sstevel@tonic-gate 	/*
14480Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
14490Sstevel@tonic-gate 	 */
14500Sstevel@tonic-gate 	if (clop) {
14510Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
14520Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
14530Sstevel@tonic-gate 	}
14540Sstevel@tonic-gate 
14550Sstevel@tonic-gate 	/*
14560Sstevel@tonic-gate 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
14570Sstevel@tonic-gate 	 * or disrupting errors have happened.  We do this because if a
14580Sstevel@tonic-gate 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
14590Sstevel@tonic-gate 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
14600Sstevel@tonic-gate 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
14610Sstevel@tonic-gate 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
14620Sstevel@tonic-gate 	 * deferred or disrupting error happening between checking the AFSR and
14630Sstevel@tonic-gate 	 * enabling NCEEN/CEEN.
14640Sstevel@tonic-gate 	 *
1465815Sdilpreet 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1466815Sdilpreet 	 * taken.
1467815Sdilpreet 	 */
1468815Sdilpreet 	set_error_enable(get_error_enable() | (nceen | ceen));
14690Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
14700Sstevel@tonic-gate 		aflt->flt_panic |= ((ch_flt.afsr_errs &
14710Sstevel@tonic-gate 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
14720Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
14730Sstevel@tonic-gate 		    NULL);
14740Sstevel@tonic-gate 	}
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate 	/*
14770Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
14780Sstevel@tonic-gate 	 * be logged as part of the panic flow.
14790Sstevel@tonic-gate 	 */
14800Sstevel@tonic-gate 	if (aflt->flt_panic)
14810Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
14820Sstevel@tonic-gate 
14830Sstevel@tonic-gate 	/*
14840Sstevel@tonic-gate 	 * Flushing the Ecache here gets the part of the trap handler that
14850Sstevel@tonic-gate 	 * is run at TL=1 out of the Ecache.
14860Sstevel@tonic-gate 	 */
14870Sstevel@tonic-gate 	cpu_flush_ecache();
14880Sstevel@tonic-gate }
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate /*
14910Sstevel@tonic-gate  * This is called via sys_trap from pil15_interrupt code if the
14920Sstevel@tonic-gate  * corresponding entry in ch_err_tl1_pending is set.  Checks the
14930Sstevel@tonic-gate  * various ch_err_tl1_data structures for valid entries based on the bit
14940Sstevel@tonic-gate  * settings in the ch_err_tl1_flags entry of the structure.
14950Sstevel@tonic-gate  */
14960Sstevel@tonic-gate /*ARGSUSED*/
14970Sstevel@tonic-gate void
14980Sstevel@tonic-gate cpu_tl1_error(struct regs *rp, int panic)
14990Sstevel@tonic-gate {
15000Sstevel@tonic-gate 	ch_err_tl1_data_t *cl1p, cl1;
15010Sstevel@tonic-gate 	int i, ncl1ps;
15020Sstevel@tonic-gate 	uint64_t me_flags;
1503815Sdilpreet 	uint64_t ceen, nceen;
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
15060Sstevel@tonic-gate 		cl1p = &ch_err_tl1_data;
15070Sstevel@tonic-gate 		ncl1ps = 1;
15080Sstevel@tonic-gate 	} else if (CPU_PRIVATE(CPU) != NULL) {
15090Sstevel@tonic-gate 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
15100Sstevel@tonic-gate 		ncl1ps = CH_ERR_TL1_TLMAX;
15110Sstevel@tonic-gate 	} else {
15120Sstevel@tonic-gate 		ncl1ps = 0;
15130Sstevel@tonic-gate 	}
15140Sstevel@tonic-gate 
15150Sstevel@tonic-gate 	for (i = 0; i < ncl1ps; i++, cl1p++) {
15160Sstevel@tonic-gate 		if (cl1p->ch_err_tl1_flags == 0)
15170Sstevel@tonic-gate 			continue;
15180Sstevel@tonic-gate 
15190Sstevel@tonic-gate 		/*
15200Sstevel@tonic-gate 		 * Grab a copy of the logout data and invalidate
15210Sstevel@tonic-gate 		 * the logout area.
15220Sstevel@tonic-gate 		 */
15230Sstevel@tonic-gate 		cl1 = *cl1p;
15240Sstevel@tonic-gate 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
15250Sstevel@tonic-gate 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
15260Sstevel@tonic-gate 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
15270Sstevel@tonic-gate 
15280Sstevel@tonic-gate 		/*
15290Sstevel@tonic-gate 		 * Log "first error" in ch_err_tl1_data.
15300Sstevel@tonic-gate 		 */
15310Sstevel@tonic-gate 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
15320Sstevel@tonic-gate 			ceen = get_error_enable() & EN_REG_CEEN;
1533815Sdilpreet 			nceen = get_error_enable() & EN_REG_NCEEN;
15340Sstevel@tonic-gate 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1535815Sdilpreet 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
15360Sstevel@tonic-gate 		}
15370Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
15380Sstevel@tonic-gate 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
15390Sstevel@tonic-gate 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
15400Sstevel@tonic-gate 			    (caddr_t)cl1.ch_err_tl1_tpc);
15410Sstevel@tonic-gate 		}
15420Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate 		/*
15450Sstevel@tonic-gate 		 * Log "multiple events" in ch_err_tl1_data.  Note that
15460Sstevel@tonic-gate 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
15470Sstevel@tonic-gate 		 * if the structure is busy, we just do the cache flushing
15480Sstevel@tonic-gate 		 * we have to do and then do the retry.  So the AFSR/AFAR
15490Sstevel@tonic-gate 		 * at this point *should* have some relevant info.  If there
15500Sstevel@tonic-gate 		 * are no valid errors in the AFSR, we'll assume they've
15510Sstevel@tonic-gate 		 * already been picked up and logged.  For I$/D$ parity,
15520Sstevel@tonic-gate 		 * we just log an event with an "Unknown" (NULL) TPC.
15530Sstevel@tonic-gate 		 */
15540Sstevel@tonic-gate 		if (me_flags & CH_ERR_FECC) {
15550Sstevel@tonic-gate 			ch_cpu_errors_t cpu_error_regs;
15560Sstevel@tonic-gate 			uint64_t t_afsr_errs;
15570Sstevel@tonic-gate 
15580Sstevel@tonic-gate 			/*
15590Sstevel@tonic-gate 			 * Get the error registers and see if there's
15600Sstevel@tonic-gate 			 * a pending error.  If not, don't bother
15610Sstevel@tonic-gate 			 * generating an "Invalid AFSR" error event.
15620Sstevel@tonic-gate 			 */
15630Sstevel@tonic-gate 			get_cpu_error_state(&cpu_error_regs);
15640Sstevel@tonic-gate 			t_afsr_errs = (cpu_error_regs.afsr_ext &
15650Sstevel@tonic-gate 			    C_AFSR_EXT_ALL_ERRS) |
15660Sstevel@tonic-gate 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
15670Sstevel@tonic-gate 			if (t_afsr_errs != 0) {
15680Sstevel@tonic-gate 				ceen = get_error_enable() & EN_REG_CEEN;
1569815Sdilpreet 				nceen = get_error_enable() & EN_REG_NCEEN;
15700Sstevel@tonic-gate 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1571815Sdilpreet 				    1, ceen, nceen, NULL);
15720Sstevel@tonic-gate 			}
15730Sstevel@tonic-gate 		}
15740Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
15750Sstevel@tonic-gate 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
15760Sstevel@tonic-gate 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
15770Sstevel@tonic-gate 		}
15780Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
15790Sstevel@tonic-gate 	}
15800Sstevel@tonic-gate }
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate /*
15830Sstevel@tonic-gate  * Called from Fast ECC TL>0 handler in case of fatal error.
15840Sstevel@tonic-gate  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
15850Sstevel@tonic-gate  * but if we don't, we'll panic with something reasonable.
15860Sstevel@tonic-gate  */
15870Sstevel@tonic-gate /*ARGSUSED*/
15880Sstevel@tonic-gate void
15890Sstevel@tonic-gate cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
15900Sstevel@tonic-gate {
15910Sstevel@tonic-gate 	cpu_tl1_error(rp, 1);
15920Sstevel@tonic-gate 	/*
15930Sstevel@tonic-gate 	 * Should never return, but just in case.
15940Sstevel@tonic-gate 	 */
15950Sstevel@tonic-gate 	fm_panic("Unsurvivable ECC Error at TL>0");
15960Sstevel@tonic-gate }
15970Sstevel@tonic-gate 
15980Sstevel@tonic-gate /*
15990Sstevel@tonic-gate  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
16000Sstevel@tonic-gate  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
16010Sstevel@tonic-gate  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
16020Sstevel@tonic-gate  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
16030Sstevel@tonic-gate  *
16040Sstevel@tonic-gate  * Cheetah+ also handles (No additional processing required):
16050Sstevel@tonic-gate  *    DUE, DTO, DBERR	(NCEEN controlled)
16060Sstevel@tonic-gate  *    THCE		(CEEN and ET_ECC_en controlled)
16070Sstevel@tonic-gate  *    TUE		(ET_ECC_en controlled)
16080Sstevel@tonic-gate  *
16090Sstevel@tonic-gate  * Panther further adds:
16100Sstevel@tonic-gate  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
16110Sstevel@tonic-gate  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
16120Sstevel@tonic-gate  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
16130Sstevel@tonic-gate  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
16140Sstevel@tonic-gate  *    THCE			(CEEN and L2_tag_ECC_en controlled)
16150Sstevel@tonic-gate  *    L3_THCE			(CEEN and ET_ECC_en controlled)
16160Sstevel@tonic-gate  *
16170Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
16180Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
16190Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
16200Sstevel@tonic-gate  */
16210Sstevel@tonic-gate /*ARGSUSED*/
16220Sstevel@tonic-gate void
16230Sstevel@tonic-gate cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
16240Sstevel@tonic-gate {
16250Sstevel@tonic-gate 	struct async_flt *aflt;
16260Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
16270Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
16280Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
16290Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
16300Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
16330Sstevel@tonic-gate 	/*
16340Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
16350Sstevel@tonic-gate 	 * pointer, then we will have to make due without any detailed
16360Sstevel@tonic-gate 	 * logout information.
16370Sstevel@tonic-gate 	 */
16380Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
16390Sstevel@tonic-gate 		clop = NULL;
16400Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
16410Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
16420Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
16430Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
16440Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
16450Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
16460Sstevel@tonic-gate #if defined(SERRANO)
16470Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
16480Sstevel@tonic-gate #endif	/* SERRANO */
16490Sstevel@tonic-gate 	} else {
16500Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
16510Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
16520Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
16530Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
16540Sstevel@tonic-gate #if defined(SERRANO)
16550Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
16560Sstevel@tonic-gate #endif	/* SERRANO */
16570Sstevel@tonic-gate 	}
16580Sstevel@tonic-gate 
16590Sstevel@tonic-gate 	/*
16600Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
16610Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
16620Sstevel@tonic-gate 	 * sticky bits.
16630Sstevel@tonic-gate 	 */
16640Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
16650Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
16660Sstevel@tonic-gate 
16670Sstevel@tonic-gate 	pr_reason[0] = '\0';
16680Sstevel@tonic-gate 	/* Setup the async fault structure */
16690Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
16700Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
16710Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
16720Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
16730Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
16740Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
16750Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
16760Sstevel@tonic-gate 	aflt->flt_tl = 0;
16770Sstevel@tonic-gate 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 	/*
16800Sstevel@tonic-gate 	 * If this trap is a result of one of the errors not masked
16810Sstevel@tonic-gate 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
16820Sstevel@tonic-gate 	 * indicate that a timeout is to be set later.
16830Sstevel@tonic-gate 	 */
16840Sstevel@tonic-gate 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
16850Sstevel@tonic-gate 	    !aflt->flt_panic)
16860Sstevel@tonic-gate 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
16870Sstevel@tonic-gate 	else
16880Sstevel@tonic-gate 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
16890Sstevel@tonic-gate 
16900Sstevel@tonic-gate 	/*
16910Sstevel@tonic-gate 	 * log the CE and clean up
16920Sstevel@tonic-gate 	 */
16930Sstevel@tonic-gate 	cpu_log_and_clear_ce(&ch_flt);
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 	/*
16960Sstevel@tonic-gate 	 * We re-enable CEEN (if required) and check if any disrupting errors
16970Sstevel@tonic-gate 	 * have happened.  We do this because if a disrupting error had occurred
16980Sstevel@tonic-gate 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
16990Sstevel@tonic-gate 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
17000Sstevel@tonic-gate 	 * we enable CEEN *before* checking the AFSR to avoid the small window
17010Sstevel@tonic-gate 	 * of a error happening between checking the AFSR and enabling CEEN.
17020Sstevel@tonic-gate 	 */
17030Sstevel@tonic-gate 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
17040Sstevel@tonic-gate 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
17050Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
17060Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
17070Sstevel@tonic-gate 		    NULL);
17080Sstevel@tonic-gate 	}
17090Sstevel@tonic-gate 
17100Sstevel@tonic-gate 	/*
17110Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
17120Sstevel@tonic-gate 	 * be logged as part of the panic flow.
17130Sstevel@tonic-gate 	 */
17140Sstevel@tonic-gate 	if (aflt->flt_panic)
17150Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate 
17180Sstevel@tonic-gate /*
17190Sstevel@tonic-gate  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
17200Sstevel@tonic-gate  * L3_EDU:BLD, TO, and BERR events.
17210Sstevel@tonic-gate  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
17220Sstevel@tonic-gate  *
17230Sstevel@tonic-gate  * Cheetah+: No additional errors handled.
17240Sstevel@tonic-gate  *
17250Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
17260Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
17270Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
17280Sstevel@tonic-gate  */
17290Sstevel@tonic-gate /*ARGSUSED*/
17300Sstevel@tonic-gate void
17310Sstevel@tonic-gate cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
17320Sstevel@tonic-gate {
17330Sstevel@tonic-gate 	ushort_t ttype, tl;
17340Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
17350Sstevel@tonic-gate 	struct async_flt *aflt;
17360Sstevel@tonic-gate 	int trampolined = 0;
17370Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
17380Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
17390Sstevel@tonic-gate 	uint64_t ceen, clo_flags;
17400Sstevel@tonic-gate 	uint64_t log_afsr;
17410Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
17420Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
17430Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
17440Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
17450Sstevel@tonic-gate 
17460Sstevel@tonic-gate 	/*
17470Sstevel@tonic-gate 	 * We need to look at p_flag to determine if the thread detected an
17480Sstevel@tonic-gate 	 * error while dumping core.  We can't grab p_lock here, but it's ok
17490Sstevel@tonic-gate 	 * because we just need a consistent snapshot and we know that everyone
17500Sstevel@tonic-gate 	 * else will store a consistent set of bits while holding p_lock.  We
17510Sstevel@tonic-gate 	 * don't have to worry about a race because SDOCORE is set once prior
17520Sstevel@tonic-gate 	 * to doing i/o from the process's address space and is never cleared.
17530Sstevel@tonic-gate 	 */
17540Sstevel@tonic-gate 	uint_t pflag = ttoproc(curthread)->p_flag;
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
17570Sstevel@tonic-gate 	/*
17580Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
17590Sstevel@tonic-gate 	 * pointer then we will have to make due without any detailed
17600Sstevel@tonic-gate 	 * logout information.
17610Sstevel@tonic-gate 	 */
17620Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
17630Sstevel@tonic-gate 		clop = NULL;
17640Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
17650Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
17660Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
17670Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
17680Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
17690Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
17700Sstevel@tonic-gate #if defined(SERRANO)
17710Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
17720Sstevel@tonic-gate #endif	/* SERRANO */
17730Sstevel@tonic-gate 		clo_flags = p_clo_flags;
17740Sstevel@tonic-gate 	} else {
17750Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
17760Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
17770Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
17780Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
17790Sstevel@tonic-gate #if defined(SERRANO)
17800Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
17810Sstevel@tonic-gate #endif	/* SERRANO */
17820Sstevel@tonic-gate 		clo_flags = clop->clo_flags;
17830Sstevel@tonic-gate 	}
17840Sstevel@tonic-gate 
17850Sstevel@tonic-gate 	/*
17860Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
17870Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
17880Sstevel@tonic-gate 	 * sticky bits.
17890Sstevel@tonic-gate 	 */
17900Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
17910Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
17920Sstevel@tonic-gate 	pr_reason[0] = '\0';
17930Sstevel@tonic-gate 
17940Sstevel@tonic-gate 	/*
17950Sstevel@tonic-gate 	 * Grab information encoded into our clo_flags field.
17960Sstevel@tonic-gate 	 */
17970Sstevel@tonic-gate 	ceen = clo_flags & EN_REG_CEEN;
17980Sstevel@tonic-gate 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
17990Sstevel@tonic-gate 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 	/*
18020Sstevel@tonic-gate 	 * handle the specific error
18030Sstevel@tonic-gate 	 */
18040Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
18050Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
18060Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
18070Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
18080Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
18090Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
18100Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
18110Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
18120Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
18130Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
18140Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
18150Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
18160Sstevel@tonic-gate 	aflt->flt_tl = (uchar_t)tl;
18170Sstevel@tonic-gate 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
18180Sstevel@tonic-gate 	    C_AFSR_PANIC(t_afsr_errs));
18190Sstevel@tonic-gate 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
18200Sstevel@tonic-gate 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
18210Sstevel@tonic-gate 
18220Sstevel@tonic-gate 	/*
18230Sstevel@tonic-gate 	 * If the trap occurred in privileged mode at TL=0, we need to check to
18240Sstevel@tonic-gate 	 * see if we were executing in the kernel under on_trap() or t_lofault
18250Sstevel@tonic-gate 	 * protection.  If so, modify the saved registers so that we return
18260Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine.
18270Sstevel@tonic-gate 	 */
18280Sstevel@tonic-gate 	if (aflt->flt_priv && tl == 0) {
18290Sstevel@tonic-gate 		if (curthread->t_ontrap != NULL) {
18300Sstevel@tonic-gate 			on_trap_data_t *otp = curthread->t_ontrap;
18310Sstevel@tonic-gate 
18320Sstevel@tonic-gate 			if (otp->ot_prot & OT_DATA_EC) {
18330Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_EC;
18340Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_EC;
18350Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
18360Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
18370Sstevel@tonic-gate 				trampolined = 1;
18380Sstevel@tonic-gate 			}
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
18410Sstevel@tonic-gate 			    (otp->ot_prot & OT_DATA_ACCESS)) {
18420Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_ACCESS;
18430Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_ACCESS;
18440Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
18450Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
18460Sstevel@tonic-gate 				trampolined = 1;
18470Sstevel@tonic-gate 				/*
18480Sstevel@tonic-gate 				 * for peeks and caut_gets errors are expected
18490Sstevel@tonic-gate 				 */
18500Sstevel@tonic-gate 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
18510Sstevel@tonic-gate 				if (!hp)
18520Sstevel@tonic-gate 					expected = DDI_FM_ERR_PEEK;
18530Sstevel@tonic-gate 				else if (hp->ah_acc.devacc_attr_access ==
18540Sstevel@tonic-gate 				    DDI_CAUTIOUS_ACC)
18550Sstevel@tonic-gate 					expected = DDI_FM_ERR_EXPECTED;
18560Sstevel@tonic-gate 			}
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 		} else if (curthread->t_lofault) {
18590Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_COPY;
18600Sstevel@tonic-gate 			rp->r_g1 = EFAULT;
18610Sstevel@tonic-gate 			rp->r_pc = curthread->t_lofault;
18620Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
18630Sstevel@tonic-gate 			trampolined = 1;
18640Sstevel@tonic-gate 		}
18650Sstevel@tonic-gate 	}
18660Sstevel@tonic-gate 
18670Sstevel@tonic-gate 	/*
18680Sstevel@tonic-gate 	 * If we're in user mode or we're doing a protected copy, we either
18690Sstevel@tonic-gate 	 * want the ASTON code below to send a signal to the user process
18700Sstevel@tonic-gate 	 * or we want to panic if aft_panic is set.
18710Sstevel@tonic-gate 	 *
18720Sstevel@tonic-gate 	 * If we're in privileged mode and we're not doing a copy, then we
18730Sstevel@tonic-gate 	 * need to check if we've trampolined.  If we haven't trampolined,
18740Sstevel@tonic-gate 	 * we should panic.
18750Sstevel@tonic-gate 	 */
18760Sstevel@tonic-gate 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
18770Sstevel@tonic-gate 		if (t_afsr_errs &
18780Sstevel@tonic-gate 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
18790Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO)))
18800Sstevel@tonic-gate 			aflt->flt_panic |= aft_panic;
18810Sstevel@tonic-gate 	} else if (!trampolined) {
18820Sstevel@tonic-gate 			aflt->flt_panic = 1;
18830Sstevel@tonic-gate 	}
18840Sstevel@tonic-gate 
18850Sstevel@tonic-gate 	/*
18860Sstevel@tonic-gate 	 * If we've trampolined due to a privileged TO or BERR, or if an
18870Sstevel@tonic-gate 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
18880Sstevel@tonic-gate 	 * event for that TO or BERR.  Queue all other events (if any) besides
18890Sstevel@tonic-gate 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
18900Sstevel@tonic-gate 	 * ignore the number of events queued.  If we haven't trampolined due
18910Sstevel@tonic-gate 	 * to a TO or BERR, just enqueue events normally.
18920Sstevel@tonic-gate 	 */
18930Sstevel@tonic-gate 	log_afsr = t_afsr_errs;
18940Sstevel@tonic-gate 	if (trampolined) {
18950Sstevel@tonic-gate 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
18960Sstevel@tonic-gate 	} else if (!aflt->flt_priv) {
18970Sstevel@tonic-gate 		/*
18980Sstevel@tonic-gate 		 * User mode, suppress messages if
18990Sstevel@tonic-gate 		 * cpu_berr_to_verbose is not set.
19000Sstevel@tonic-gate 		 */
19010Sstevel@tonic-gate 		if (!cpu_berr_to_verbose)
19020Sstevel@tonic-gate 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
19030Sstevel@tonic-gate 	}
19040Sstevel@tonic-gate 
19050Sstevel@tonic-gate 	/*
19060Sstevel@tonic-gate 	 * Log any errors that occurred
19070Sstevel@tonic-gate 	 */
19080Sstevel@tonic-gate 	if (((log_afsr &
19090Sstevel@tonic-gate 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
19100Sstevel@tonic-gate 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
19110Sstevel@tonic-gate 		(t_afsr_errs &
19120Sstevel@tonic-gate 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
19130Sstevel@tonic-gate 		ch_flt.flt_type = CPU_INV_AFSR;
19140Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
19150Sstevel@tonic-gate 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
19160Sstevel@tonic-gate 		    aflt->flt_panic);
19170Sstevel@tonic-gate 	}
19180Sstevel@tonic-gate 
19190Sstevel@tonic-gate 	/*
19200Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
19210Sstevel@tonic-gate 	 */
19220Sstevel@tonic-gate 	if (clop) {
19230Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
19240Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
19250Sstevel@tonic-gate 	}
19260Sstevel@tonic-gate 
19270Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
19280Sstevel@tonic-gate 	/*
19290Sstevel@tonic-gate 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
19300Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
19310Sstevel@tonic-gate 	 */
19320Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
19330Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
19340Sstevel@tonic-gate 	}
19350Sstevel@tonic-gate 
19360Sstevel@tonic-gate 	/*
19370Sstevel@tonic-gate 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
19380Sstevel@tonic-gate 	 * line from the Ecache.  We also need to query the bus nexus for
19390Sstevel@tonic-gate 	 * fatal errors.  Attempts to do diagnostic read on caches may
19400Sstevel@tonic-gate 	 * introduce more errors (especially when the module is bad).
19410Sstevel@tonic-gate 	 */
19420Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
19430Sstevel@tonic-gate 		/*
19440Sstevel@tonic-gate 		 * Ask our bus nexus friends if they have any fatal errors.  If
19450Sstevel@tonic-gate 		 * so, they will log appropriate error messages.
19460Sstevel@tonic-gate 		 */
19470Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
19480Sstevel@tonic-gate 			aflt->flt_panic = 1;
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate 		/*
19510Sstevel@tonic-gate 		 * We got a UE or RUE and are panicking, save the fault PA in
19520Sstevel@tonic-gate 		 * a known location so that the platform specific panic code
19530Sstevel@tonic-gate 		 * can check for copyback errors.
19540Sstevel@tonic-gate 		 */
19550Sstevel@tonic-gate 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
19560Sstevel@tonic-gate 			panic_aflt = *aflt;
19570Sstevel@tonic-gate 		}
19580Sstevel@tonic-gate 	}
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	/*
19610Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache
19620Sstevel@tonic-gate 	 */
19630Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
19640Sstevel@tonic-gate 		cpu_error_ecache_flush(&ch_flt);
19650Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
19660Sstevel@tonic-gate 	/*
19670Sstevel@tonic-gate 	 * UE/BERR/TO: Call our bus nexus friends to check for
19680Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
19690Sstevel@tonic-gate 	 */
19700Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
19710Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
19720Sstevel@tonic-gate 	}
19730Sstevel@tonic-gate 
19740Sstevel@tonic-gate 	/*
19750Sstevel@tonic-gate 	 * UE: If the UE is in memory, we need to flush the bad
19760Sstevel@tonic-gate 	 * line from the Ecache.  We also need to query the bus nexus for
19770Sstevel@tonic-gate 	 * fatal errors.  Attempts to do diagnostic read on caches may
19780Sstevel@tonic-gate 	 * introduce more errors (especially when the module is bad).
19790Sstevel@tonic-gate 	 */
19800Sstevel@tonic-gate 	if (t_afsr & C_AFSR_UE) {
19810Sstevel@tonic-gate 		/*
19820Sstevel@tonic-gate 		 * Ask our legacy bus nexus friends if they have any fatal
19830Sstevel@tonic-gate 		 * errors.  If so, they will log appropriate error messages.
19840Sstevel@tonic-gate 		 */
19850Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
19860Sstevel@tonic-gate 			aflt->flt_panic = 1;
19870Sstevel@tonic-gate 
19880Sstevel@tonic-gate 		/*
19890Sstevel@tonic-gate 		 * We got a UE and are panicking, save the fault PA in a known
19900Sstevel@tonic-gate 		 * location so that the platform specific panic code can check
19910Sstevel@tonic-gate 		 * for copyback errors.
19920Sstevel@tonic-gate 		 */
19930Sstevel@tonic-gate 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
19940Sstevel@tonic-gate 			panic_aflt = *aflt;
19950Sstevel@tonic-gate 		}
19960Sstevel@tonic-gate 	}
19970Sstevel@tonic-gate 
19980Sstevel@tonic-gate 	/*
19990Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache
20000Sstevel@tonic-gate 	 */
20010Sstevel@tonic-gate 	if (t_afsr_errs &
20020Sstevel@tonic-gate 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
20030Sstevel@tonic-gate 		cpu_error_ecache_flush(&ch_flt);
20040Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
20050Sstevel@tonic-gate 
20060Sstevel@tonic-gate 	/*
20070Sstevel@tonic-gate 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
20080Sstevel@tonic-gate 	 * or disrupting errors have happened.  We do this because if a
20090Sstevel@tonic-gate 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
20100Sstevel@tonic-gate 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
20110Sstevel@tonic-gate 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
20120Sstevel@tonic-gate 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
20130Sstevel@tonic-gate 	 * deferred or disrupting error happening between checking the AFSR and
20140Sstevel@tonic-gate 	 * enabling NCEEN/CEEN.
20150Sstevel@tonic-gate 	 *
20160Sstevel@tonic-gate 	 * Note: CEEN reenabled only if it was on when trap taken.
20170Sstevel@tonic-gate 	 */
20180Sstevel@tonic-gate 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
20190Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
20200Sstevel@tonic-gate 		/*
20210Sstevel@tonic-gate 		 * Check for secondary errors, and avoid panicking if we
20220Sstevel@tonic-gate 		 * have them
20230Sstevel@tonic-gate 		 */
20240Sstevel@tonic-gate 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
20250Sstevel@tonic-gate 		    t_afar) == 0) {
20260Sstevel@tonic-gate 			aflt->flt_panic |= ((ch_flt.afsr_errs &
20270Sstevel@tonic-gate 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
20280Sstevel@tonic-gate 		}
20290Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
20300Sstevel@tonic-gate 		    NULL);
20310Sstevel@tonic-gate 	}
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate 	/*
20340Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
20350Sstevel@tonic-gate 	 * be logged as part of the panic flow.
20360Sstevel@tonic-gate 	 */
20370Sstevel@tonic-gate 	if (aflt->flt_panic)
20380Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
20390Sstevel@tonic-gate 
20400Sstevel@tonic-gate 	/*
20410Sstevel@tonic-gate 	 * If we queued an error and we are going to return from the trap and
20420Sstevel@tonic-gate 	 * the error was in user mode or inside of a copy routine, set AST flag
20430Sstevel@tonic-gate 	 * so the queue will be drained before returning to user mode.  The
20440Sstevel@tonic-gate 	 * AST processing will also act on our failure policy.
20450Sstevel@tonic-gate 	 */
20460Sstevel@tonic-gate 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
20470Sstevel@tonic-gate 		int pcb_flag = 0;
20480Sstevel@tonic-gate 
20490Sstevel@tonic-gate 		if (t_afsr_errs &
20500Sstevel@tonic-gate 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
20510Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO)))
20520Sstevel@tonic-gate 			pcb_flag |= ASYNC_HWERR;
20530Sstevel@tonic-gate 
20540Sstevel@tonic-gate 		if (t_afsr & C_AFSR_BERR)
20550Sstevel@tonic-gate 			pcb_flag |= ASYNC_BERR;
20560Sstevel@tonic-gate 
20570Sstevel@tonic-gate 		if (t_afsr & C_AFSR_TO)
20580Sstevel@tonic-gate 			pcb_flag |= ASYNC_BTO;
20590Sstevel@tonic-gate 
20600Sstevel@tonic-gate 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
20610Sstevel@tonic-gate 		aston(curthread);
20620Sstevel@tonic-gate 	}
20630Sstevel@tonic-gate }
20640Sstevel@tonic-gate 
20650Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
20660Sstevel@tonic-gate /*
20670Sstevel@tonic-gate  * Handling of data and instruction parity errors (traps 0x71, 0x72).
20680Sstevel@tonic-gate  *
20690Sstevel@tonic-gate  * For Panther, P$ data parity errors during floating point load hits
20700Sstevel@tonic-gate  * are also detected (reported as TT 0x71) and handled by this trap
20710Sstevel@tonic-gate  * handler.
20720Sstevel@tonic-gate  *
20730Sstevel@tonic-gate  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
20740Sstevel@tonic-gate  * is available.
20750Sstevel@tonic-gate  */
20760Sstevel@tonic-gate /*ARGSUSED*/
20770Sstevel@tonic-gate void
20780Sstevel@tonic-gate cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
20790Sstevel@tonic-gate {
20800Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
20810Sstevel@tonic-gate 	struct async_flt *aflt;
20820Sstevel@tonic-gate 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
20830Sstevel@tonic-gate 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
20840Sstevel@tonic-gate 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
20850Sstevel@tonic-gate 	char *error_class;
20860Sstevel@tonic-gate 
20870Sstevel@tonic-gate 	/*
20880Sstevel@tonic-gate 	 * Log the error.
20890Sstevel@tonic-gate 	 * For icache parity errors the fault address is the trap PC.
20900Sstevel@tonic-gate 	 * For dcache/pcache parity errors the instruction would have to
20910Sstevel@tonic-gate 	 * be decoded to determine the address and that isn't possible
20920Sstevel@tonic-gate 	 * at high PIL.
20930Sstevel@tonic-gate 	 */
20940Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
20950Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
20960Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
20970Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
20980Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
20990Sstevel@tonic-gate 	aflt->flt_pc = tpc;
21000Sstevel@tonic-gate 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
21010Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
21020Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
21030Sstevel@tonic-gate 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
21040Sstevel@tonic-gate 	aflt->flt_tl = tl;
21050Sstevel@tonic-gate 	aflt->flt_panic = panic;
21060Sstevel@tonic-gate 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
21070Sstevel@tonic-gate 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
21080Sstevel@tonic-gate 
21090Sstevel@tonic-gate 	if (iparity) {
21100Sstevel@tonic-gate 		cpu_icache_parity_info(&ch_flt);
21110Sstevel@tonic-gate 		if (ch_flt.parity_data.ipe.cpl_off != -1)
21120Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
21130Sstevel@tonic-gate 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
21140Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
21150Sstevel@tonic-gate 		else
21160Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_IPE;
21170Sstevel@tonic-gate 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
21180Sstevel@tonic-gate 	} else {
21190Sstevel@tonic-gate 		cpu_dcache_parity_info(&ch_flt);
21200Sstevel@tonic-gate 		if (ch_flt.parity_data.dpe.cpl_off != -1)
21210Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
21220Sstevel@tonic-gate 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
21230Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
21240Sstevel@tonic-gate 		else
21250Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DPE;
21260Sstevel@tonic-gate 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
21270Sstevel@tonic-gate 		/*
21280Sstevel@tonic-gate 		 * For panther we also need to check the P$ for parity errors.
21290Sstevel@tonic-gate 		 */
21300Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
21310Sstevel@tonic-gate 			cpu_pcache_parity_info(&ch_flt);
21320Sstevel@tonic-gate 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
21330Sstevel@tonic-gate 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
21340Sstevel@tonic-gate 				aflt->flt_payload =
21350Sstevel@tonic-gate 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
21360Sstevel@tonic-gate 			}
21370Sstevel@tonic-gate 		}
21380Sstevel@tonic-gate 	}
21390Sstevel@tonic-gate 
21400Sstevel@tonic-gate 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
21410Sstevel@tonic-gate 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
21420Sstevel@tonic-gate 
21430Sstevel@tonic-gate 	if (iparity) {
21440Sstevel@tonic-gate 		/*
21450Sstevel@tonic-gate 		 * Invalidate entire I$.
21460Sstevel@tonic-gate 		 * This is required due to the use of diagnostic ASI
21470Sstevel@tonic-gate 		 * accesses that may result in a loss of I$ coherency.
21480Sstevel@tonic-gate 		 */
21490Sstevel@tonic-gate 		if (cache_boot_state & DCU_IC) {
21500Sstevel@tonic-gate 			flush_icache();
21510Sstevel@tonic-gate 		}
21520Sstevel@tonic-gate 		/*
21530Sstevel@tonic-gate 		 * According to section P.3.1 of the Panther PRM, we
21540Sstevel@tonic-gate 		 * need to do a little more for recovery on those
21550Sstevel@tonic-gate 		 * CPUs after encountering an I$ parity error.
21560Sstevel@tonic-gate 		 */
21570Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
21580Sstevel@tonic-gate 			flush_ipb();
21590Sstevel@tonic-gate 			correct_dcache_parity(dcache_size,
21600Sstevel@tonic-gate 			    dcache_linesize);
21610Sstevel@tonic-gate 			flush_pcache();
21620Sstevel@tonic-gate 		}
21630Sstevel@tonic-gate 	} else {
21640Sstevel@tonic-gate 		/*
21650Sstevel@tonic-gate 		 * Since the valid bit is ignored when checking parity the
21660Sstevel@tonic-gate 		 * D$ data and tag must also be corrected.  Set D$ data bits
21670Sstevel@tonic-gate 		 * to zero and set utag to 0, 1, 2, 3.
21680Sstevel@tonic-gate 		 */
21690Sstevel@tonic-gate 		correct_dcache_parity(dcache_size, dcache_linesize);
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 		/*
21720Sstevel@tonic-gate 		 * According to section P.3.3 of the Panther PRM, we
21730Sstevel@tonic-gate 		 * need to do a little more for recovery on those
21740Sstevel@tonic-gate 		 * CPUs after encountering a D$ or P$ parity error.
21750Sstevel@tonic-gate 		 *
21760Sstevel@tonic-gate 		 * As far as clearing P$ parity errors, it is enough to
21770Sstevel@tonic-gate 		 * simply invalidate all entries in the P$ since P$ parity
21780Sstevel@tonic-gate 		 * error traps are only generated for floating point load
21790Sstevel@tonic-gate 		 * hits.
21800Sstevel@tonic-gate 		 */
21810Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
21820Sstevel@tonic-gate 			flush_icache();
21830Sstevel@tonic-gate 			flush_ipb();
21840Sstevel@tonic-gate 			flush_pcache();
21850Sstevel@tonic-gate 		}
21860Sstevel@tonic-gate 	}
21870Sstevel@tonic-gate 
21880Sstevel@tonic-gate 	/*
21890Sstevel@tonic-gate 	 * Invalidate entire D$ if it was enabled.
21900Sstevel@tonic-gate 	 * This is done to avoid stale data in the D$ which might
21910Sstevel@tonic-gate 	 * occur with the D$ disabled and the trap handler doing
21920Sstevel@tonic-gate 	 * stores affecting lines already in the D$.
21930Sstevel@tonic-gate 	 */
21940Sstevel@tonic-gate 	if (cache_boot_state & DCU_DC) {
21950Sstevel@tonic-gate 		flush_dcache();
21960Sstevel@tonic-gate 	}
21970Sstevel@tonic-gate 
21980Sstevel@tonic-gate 	/*
21990Sstevel@tonic-gate 	 * Restore caches to their bootup state.
22000Sstevel@tonic-gate 	 */
22010Sstevel@tonic-gate 	set_dcu(get_dcu() | cache_boot_state);
22020Sstevel@tonic-gate 
22030Sstevel@tonic-gate 	/*
22040Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
22050Sstevel@tonic-gate 	 * be logged as part of the panic flow.
22060Sstevel@tonic-gate 	 */
22070Sstevel@tonic-gate 	if (aflt->flt_panic)
22080Sstevel@tonic-gate 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
22090Sstevel@tonic-gate 
22100Sstevel@tonic-gate 	/*
22110Sstevel@tonic-gate 	 * If this error occurred at TL>0 then flush the E$ here to reduce
22120Sstevel@tonic-gate 	 * the chance of getting an unrecoverable Fast ECC error.  This
22130Sstevel@tonic-gate 	 * flush will evict the part of the parity trap handler that is run
22140Sstevel@tonic-gate 	 * at TL>1.
22150Sstevel@tonic-gate 	 */
22160Sstevel@tonic-gate 	if (tl) {
22170Sstevel@tonic-gate 		cpu_flush_ecache();
22180Sstevel@tonic-gate 	}
22190Sstevel@tonic-gate }
22200Sstevel@tonic-gate 
22210Sstevel@tonic-gate /*
22220Sstevel@tonic-gate  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
22230Sstevel@tonic-gate  * to indicate which portions of the captured data should be in the ereport.
22240Sstevel@tonic-gate  */
22250Sstevel@tonic-gate void
22260Sstevel@tonic-gate cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
22270Sstevel@tonic-gate {
22280Sstevel@tonic-gate 	int way = ch_flt->parity_data.ipe.cpl_way;
22290Sstevel@tonic-gate 	int offset = ch_flt->parity_data.ipe.cpl_off;
22300Sstevel@tonic-gate 	int tag_index;
22310Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
22320Sstevel@tonic-gate 
22330Sstevel@tonic-gate 
22340Sstevel@tonic-gate 	if ((offset != -1) || (way != -1)) {
22350Sstevel@tonic-gate 		/*
22360Sstevel@tonic-gate 		 * Parity error in I$ tag or data
22370Sstevel@tonic-gate 		 */
22380Sstevel@tonic-gate 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
22390Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
22400Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
22410Sstevel@tonic-gate 			    PN_ICIDX_TO_WAY(tag_index);
22420Sstevel@tonic-gate 		else
22430Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
22440Sstevel@tonic-gate 			    CH_ICIDX_TO_WAY(tag_index);
22450Sstevel@tonic-gate 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
22460Sstevel@tonic-gate 		    IC_LOGFLAG_MAGIC;
22470Sstevel@tonic-gate 	} else {
22480Sstevel@tonic-gate 		/*
22490Sstevel@tonic-gate 		 * Parity error was not identified.
22500Sstevel@tonic-gate 		 * Log tags and data for all ways.
22510Sstevel@tonic-gate 		 */
22520Sstevel@tonic-gate 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
22530Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
22540Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
22550Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
22560Sstevel@tonic-gate 				    PN_ICIDX_TO_WAY(tag_index);
22570Sstevel@tonic-gate 			else
22580Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
22590Sstevel@tonic-gate 				    CH_ICIDX_TO_WAY(tag_index);
22600Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
22610Sstevel@tonic-gate 			    IC_LOGFLAG_MAGIC;
22620Sstevel@tonic-gate 		}
22630Sstevel@tonic-gate 	}
22640Sstevel@tonic-gate }
22650Sstevel@tonic-gate 
22660Sstevel@tonic-gate /*
22670Sstevel@tonic-gate  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
22680Sstevel@tonic-gate  * to indicate which portions of the captured data should be in the ereport.
22690Sstevel@tonic-gate  */
22700Sstevel@tonic-gate void
22710Sstevel@tonic-gate cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
22720Sstevel@tonic-gate {
22730Sstevel@tonic-gate 	int way = ch_flt->parity_data.dpe.cpl_way;
22740Sstevel@tonic-gate 	int offset = ch_flt->parity_data.dpe.cpl_off;
22750Sstevel@tonic-gate 	int tag_index;
22760Sstevel@tonic-gate 
22770Sstevel@tonic-gate 	if (offset != -1) {
22780Sstevel@tonic-gate 		/*
22790Sstevel@tonic-gate 		 * Parity error in D$ or P$ data array.
22800Sstevel@tonic-gate 		 *
22810Sstevel@tonic-gate 		 * First check to see whether the parity error is in D$ or P$
22820Sstevel@tonic-gate 		 * since P$ data parity errors are reported in Panther using
22830Sstevel@tonic-gate 		 * the same trap.
22840Sstevel@tonic-gate 		 */
22850Sstevel@tonic-gate 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
22860Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
22870Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
22880Sstevel@tonic-gate 			    CH_PCIDX_TO_WAY(tag_index);
22890Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
22900Sstevel@tonic-gate 			    PC_LOGFLAG_MAGIC;
22910Sstevel@tonic-gate 		} else {
22920Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
22930Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
22940Sstevel@tonic-gate 			    CH_DCIDX_TO_WAY(tag_index);
22950Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
22960Sstevel@tonic-gate 			    DC_LOGFLAG_MAGIC;
22970Sstevel@tonic-gate 		}
22980Sstevel@tonic-gate 	} else if (way != -1) {
22990Sstevel@tonic-gate 		/*
23000Sstevel@tonic-gate 		 * Parity error in D$ tag.
23010Sstevel@tonic-gate 		 */
23020Sstevel@tonic-gate 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
23030Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
23040Sstevel@tonic-gate 		    CH_DCIDX_TO_WAY(tag_index);
23050Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
23060Sstevel@tonic-gate 		    DC_LOGFLAG_MAGIC;
23070Sstevel@tonic-gate 	}
23080Sstevel@tonic-gate }
23090Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
23100Sstevel@tonic-gate 
23110Sstevel@tonic-gate /*
23120Sstevel@tonic-gate  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
23130Sstevel@tonic-gate  * post-process CPU events that are dequeued.  As such, it can be invoked
23140Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
23150Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
23160Sstevel@tonic-gate  * Historically this entry point was used to log the actual cmn_err(9F) text;
23170Sstevel@tonic-gate  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
23180Sstevel@tonic-gate  * With FMA this function now also returns a flag which indicates to the
23190Sstevel@tonic-gate  * caller whether the ereport should be posted (1) or suppressed (0).
23200Sstevel@tonic-gate  */
23210Sstevel@tonic-gate static int
23220Sstevel@tonic-gate cpu_async_log_err(void *flt, errorq_elem_t *eqep)
23230Sstevel@tonic-gate {
23240Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
23250Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)flt;
2326917Selowe 	uint64_t errors;
23270Sstevel@tonic-gate 
23280Sstevel@tonic-gate 	switch (ch_flt->flt_type) {
23290Sstevel@tonic-gate 	case CPU_INV_AFSR:
23300Sstevel@tonic-gate 		/*
23310Sstevel@tonic-gate 		 * If it is a disrupting trap and the AFSR is zero, then
23320Sstevel@tonic-gate 		 * the event has probably already been noted. Do not post
23330Sstevel@tonic-gate 		 * an ereport.
23340Sstevel@tonic-gate 		 */
23350Sstevel@tonic-gate 		if ((aflt->flt_status & ECC_C_TRAP) &&
23360Sstevel@tonic-gate 		    (!(aflt->flt_stat & C_AFSR_MASK)))
23370Sstevel@tonic-gate 			return (0);
23380Sstevel@tonic-gate 		else
23390Sstevel@tonic-gate 			return (1);
23400Sstevel@tonic-gate 	case CPU_TO:
23410Sstevel@tonic-gate 	case CPU_BERR:
23420Sstevel@tonic-gate 	case CPU_FATAL:
23430Sstevel@tonic-gate 	case CPU_FPUERR:
23440Sstevel@tonic-gate 		return (1);
23450Sstevel@tonic-gate 
23460Sstevel@tonic-gate 	case CPU_UE_ECACHE_RETIRE:
23470Sstevel@tonic-gate 		cpu_log_err(aflt);
23480Sstevel@tonic-gate 		cpu_page_retire(ch_flt);
23490Sstevel@tonic-gate 		return (1);
23500Sstevel@tonic-gate 
23510Sstevel@tonic-gate 	/*
23520Sstevel@tonic-gate 	 * Cases where we may want to suppress logging or perform
23530Sstevel@tonic-gate 	 * extended diagnostics.
23540Sstevel@tonic-gate 	 */
23550Sstevel@tonic-gate 	case CPU_CE:
23560Sstevel@tonic-gate 	case CPU_EMC:
23570Sstevel@tonic-gate 		/*
23580Sstevel@tonic-gate 		 * We want to skip logging and further classification
23590Sstevel@tonic-gate 		 * only if ALL the following conditions are true:
23600Sstevel@tonic-gate 		 *
23610Sstevel@tonic-gate 		 *	1. There is only one error
23620Sstevel@tonic-gate 		 *	2. That error is a correctable memory error
23630Sstevel@tonic-gate 		 *	3. The error is caused by the memory scrubber (in
23640Sstevel@tonic-gate 		 *	   which case the error will have occurred under
23650Sstevel@tonic-gate 		 *	   on_trap protection)
23660Sstevel@tonic-gate 		 *	4. The error is on a retired page
23670Sstevel@tonic-gate 		 *
23680Sstevel@tonic-gate 		 * Note: AFLT_PROT_EC is used places other than the memory
23690Sstevel@tonic-gate 		 * scrubber.  However, none of those errors should occur
23700Sstevel@tonic-gate 		 * on a retired page.
23710Sstevel@tonic-gate 		 */
23720Sstevel@tonic-gate 		if ((ch_flt->afsr_errs &
23730Sstevel@tonic-gate 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
23740Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
23750Sstevel@tonic-gate 
2376917Selowe 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
23770Sstevel@tonic-gate 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
23780Sstevel@tonic-gate 
23790Sstevel@tonic-gate 				/*
23800Sstevel@tonic-gate 				 * Since we're skipping logging, we'll need
23810Sstevel@tonic-gate 				 * to schedule the re-enabling of CEEN
23820Sstevel@tonic-gate 				 */
23830Sstevel@tonic-gate 				(void) timeout(cpu_delayed_check_ce_errors,
2384946Smathue 				    (void *)(uintptr_t)aflt->flt_inst,
2385946Smathue 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2386946Smathue 						 * MICROSEC));
23870Sstevel@tonic-gate 			    }
23880Sstevel@tonic-gate 			    return (0);
23890Sstevel@tonic-gate 			}
23900Sstevel@tonic-gate 		}
23910Sstevel@tonic-gate 
23920Sstevel@tonic-gate 		/*
23930Sstevel@tonic-gate 		 * Perform/schedule further classification actions, but
23940Sstevel@tonic-gate 		 * only if the page is healthy (we don't want bad
23950Sstevel@tonic-gate 		 * pages inducing too much diagnostic activity).  If we could
23960Sstevel@tonic-gate 		 * not find a page pointer then we also skip this.  If
23970Sstevel@tonic-gate 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
23980Sstevel@tonic-gate 		 * to copy and recirculate the event (for further diagnostics)
23990Sstevel@tonic-gate 		 * and we should not proceed to log it here.
24000Sstevel@tonic-gate 		 *
24010Sstevel@tonic-gate 		 * This must be the last step here before the cpu_log_err()
24020Sstevel@tonic-gate 		 * below - if an event recirculates cpu_ce_log_err() will
24030Sstevel@tonic-gate 		 * not call the current function but just proceed directly
24040Sstevel@tonic-gate 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
24050Sstevel@tonic-gate 		 *
24060Sstevel@tonic-gate 		 * Note: Check cpu_impl_async_log_err if changing this
24070Sstevel@tonic-gate 		 */
2408917Selowe 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2409917Selowe 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2410917Selowe 			    CE_XDIAG_SKIP_NOPP);
2411917Selowe 		} else {
2412917Selowe 			if (errors != PR_OK) {
24130Sstevel@tonic-gate 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
24140Sstevel@tonic-gate 				    CE_XDIAG_SKIP_PAGEDET);
24150Sstevel@tonic-gate 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
24160Sstevel@tonic-gate 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
24170Sstevel@tonic-gate 				return (0);
24180Sstevel@tonic-gate 			}
24190Sstevel@tonic-gate 		}
24200Sstevel@tonic-gate 		/*FALLTHRU*/
24210Sstevel@tonic-gate 
24220Sstevel@tonic-gate 	/*
24230Sstevel@tonic-gate 	 * Cases where we just want to report the error and continue.
24240Sstevel@tonic-gate 	 */
24250Sstevel@tonic-gate 	case CPU_CE_ECACHE:
24260Sstevel@tonic-gate 	case CPU_UE_ECACHE:
24270Sstevel@tonic-gate 	case CPU_IV:
24280Sstevel@tonic-gate 	case CPU_ORPH:
24290Sstevel@tonic-gate 		cpu_log_err(aflt);
24300Sstevel@tonic-gate 		return (1);
24310Sstevel@tonic-gate 
24320Sstevel@tonic-gate 	/*
24330Sstevel@tonic-gate 	 * Cases where we want to fall through to handle panicking.
24340Sstevel@tonic-gate 	 */
24350Sstevel@tonic-gate 	case CPU_UE:
24360Sstevel@tonic-gate 		/*
24370Sstevel@tonic-gate 		 * We want to skip logging in the same conditions as the
24380Sstevel@tonic-gate 		 * CE case.  In addition, we want to make sure we're not
24390Sstevel@tonic-gate 		 * panicking.
24400Sstevel@tonic-gate 		 */
24410Sstevel@tonic-gate 		if (!panicstr && (ch_flt->afsr_errs &
24420Sstevel@tonic-gate 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
24430Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
2444917Selowe 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
24450Sstevel@tonic-gate 				/* Zero the address to clear the error */
24460Sstevel@tonic-gate 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
24470Sstevel@tonic-gate 				return (0);
24480Sstevel@tonic-gate 			}
24490Sstevel@tonic-gate 		}
24500Sstevel@tonic-gate 		cpu_log_err(aflt);
24510Sstevel@tonic-gate 		break;
24520Sstevel@tonic-gate 
24530Sstevel@tonic-gate 	default:
24540Sstevel@tonic-gate 		/*
24550Sstevel@tonic-gate 		 * If the us3_common.c code doesn't know the flt_type, it may
24560Sstevel@tonic-gate 		 * be an implementation-specific code.  Call into the impldep
24570Sstevel@tonic-gate 		 * backend to find out what to do: if it tells us to continue,
24580Sstevel@tonic-gate 		 * break and handle as if falling through from a UE; if not,
24590Sstevel@tonic-gate 		 * the impldep backend has handled the error and we're done.
24600Sstevel@tonic-gate 		 */
24610Sstevel@tonic-gate 		switch (cpu_impl_async_log_err(flt, eqep)) {
24620Sstevel@tonic-gate 		case CH_ASYNC_LOG_DONE:
24630Sstevel@tonic-gate 			return (1);
24640Sstevel@tonic-gate 		case CH_ASYNC_LOG_RECIRC:
24650Sstevel@tonic-gate 			return (0);
24660Sstevel@tonic-gate 		case CH_ASYNC_LOG_CONTINUE:
24670Sstevel@tonic-gate 			break; /* continue on to handle UE-like error */
24680Sstevel@tonic-gate 		default:
24690Sstevel@tonic-gate 			cmn_err(CE_WARN, "discarding error 0x%p with "
24700Sstevel@tonic-gate 			    "invalid fault type (0x%x)",
24710Sstevel@tonic-gate 			    (void *)aflt, ch_flt->flt_type);
24720Sstevel@tonic-gate 			return (0);
24730Sstevel@tonic-gate 		}
24740Sstevel@tonic-gate 	}
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	/* ... fall through from the UE case */
24770Sstevel@tonic-gate 
24780Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
24790Sstevel@tonic-gate 		if (!panicstr) {
24800Sstevel@tonic-gate 			cpu_page_retire(ch_flt);
24810Sstevel@tonic-gate 		} else {
24820Sstevel@tonic-gate 			/*
24830Sstevel@tonic-gate 			 * Clear UEs on panic so that we don't
24840Sstevel@tonic-gate 			 * get haunted by them during panic or
24850Sstevel@tonic-gate 			 * after reboot
24860Sstevel@tonic-gate 			 */
24870Sstevel@tonic-gate 			cpu_clearphys(aflt);
24880Sstevel@tonic-gate 			(void) clear_errors(NULL);
24890Sstevel@tonic-gate 		}
24900Sstevel@tonic-gate 	}
24910Sstevel@tonic-gate 
24920Sstevel@tonic-gate 	return (1);
24930Sstevel@tonic-gate }
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate /*
24960Sstevel@tonic-gate  * Retire the bad page that may contain the flushed error.
24970Sstevel@tonic-gate  */
24980Sstevel@tonic-gate void
24990Sstevel@tonic-gate cpu_page_retire(ch_async_flt_t *ch_flt)
25000Sstevel@tonic-gate {
25010Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2502917Selowe 	(void) page_retire(aflt->flt_addr, PR_UE);
25030Sstevel@tonic-gate }
25040Sstevel@tonic-gate 
25050Sstevel@tonic-gate /*
25060Sstevel@tonic-gate  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
25070Sstevel@tonic-gate  * generic event post-processing for correctable and uncorrectable memory,
25080Sstevel@tonic-gate  * E$, and MTag errors.  Historically this entry point was used to log bits of
25090Sstevel@tonic-gate  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
25100Sstevel@tonic-gate  * converted into an ereport.  In addition, it transmits the error to any
25110Sstevel@tonic-gate  * platform-specific service-processor FRU logging routines, if available.
25120Sstevel@tonic-gate  */
25130Sstevel@tonic-gate void
25140Sstevel@tonic-gate cpu_log_err(struct async_flt *aflt)
25150Sstevel@tonic-gate {
25160Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
25170Sstevel@tonic-gate 	int len = 0;
25180Sstevel@tonic-gate 	int synd_status, synd_code, afar_status;
25190Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
25200Sstevel@tonic-gate 
25210Sstevel@tonic-gate 	/*
25220Sstevel@tonic-gate 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
25230Sstevel@tonic-gate 	 * For Panther, L2$ is not external, so we don't want to
25240Sstevel@tonic-gate 	 * generate an E$ unum for those errors.
25250Sstevel@tonic-gate 	 */
25260Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
25270Sstevel@tonic-gate 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
25280Sstevel@tonic-gate 			aflt->flt_status |= ECC_ECACHE;
25290Sstevel@tonic-gate 	} else {
25300Sstevel@tonic-gate 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
25310Sstevel@tonic-gate 			aflt->flt_status |= ECC_ECACHE;
25320Sstevel@tonic-gate 	}
25330Sstevel@tonic-gate 
25340Sstevel@tonic-gate 	/*
25350Sstevel@tonic-gate 	 * Determine syndrome status.
25360Sstevel@tonic-gate 	 */
25370Sstevel@tonic-gate 	synd_status = afsr_to_synd_status(aflt->flt_inst,
25380Sstevel@tonic-gate 	    ch_flt->afsr_errs, ch_flt->flt_bit);
25390Sstevel@tonic-gate 
25400Sstevel@tonic-gate 	/*
25410Sstevel@tonic-gate 	 * Determine afar status.
25420Sstevel@tonic-gate 	 */
25430Sstevel@tonic-gate 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
25440Sstevel@tonic-gate 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
25450Sstevel@tonic-gate 				ch_flt->flt_bit);
25460Sstevel@tonic-gate 	else
25470Sstevel@tonic-gate 		afar_status = AFLT_STAT_INVALID;
25480Sstevel@tonic-gate 
25490Sstevel@tonic-gate 	/*
25500Sstevel@tonic-gate 	 * If afar status is not invalid do a unum lookup.
25510Sstevel@tonic-gate 	 */
25520Sstevel@tonic-gate 	if (afar_status != AFLT_STAT_INVALID) {
25530Sstevel@tonic-gate 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
25540Sstevel@tonic-gate 			UNUM_NAMLEN, &len);
25550Sstevel@tonic-gate 	} else {
25560Sstevel@tonic-gate 		unum[0] = '\0';
25570Sstevel@tonic-gate 	}
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status,
25600Sstevel@tonic-gate 	    aflt->flt_synd, ch_flt->flt_bit);
25610Sstevel@tonic-gate 
25620Sstevel@tonic-gate 	/*
25630Sstevel@tonic-gate 	 * Do not send the fruid message (plat_ecc_error_data_t)
25640Sstevel@tonic-gate 	 * to the SC if it can handle the enhanced error information
25650Sstevel@tonic-gate 	 * (plat_ecc_error2_data_t) or when the tunable
25660Sstevel@tonic-gate 	 * ecc_log_fruid_enable is set to 0.
25670Sstevel@tonic-gate 	 */
25680Sstevel@tonic-gate 
25690Sstevel@tonic-gate 	if (&plat_ecc_capability_sc_get &&
25700Sstevel@tonic-gate 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
25710Sstevel@tonic-gate 		if (&plat_log_fruid_error)
25720Sstevel@tonic-gate 			plat_log_fruid_error(synd_code, aflt, unum,
25730Sstevel@tonic-gate 			    ch_flt->flt_bit);
25740Sstevel@tonic-gate 	}
25750Sstevel@tonic-gate 
25760Sstevel@tonic-gate 	if (aflt->flt_func != NULL)
25770Sstevel@tonic-gate 		aflt->flt_func(aflt, unum);
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 	if (afar_status != AFLT_STAT_INVALID)
25800Sstevel@tonic-gate 		cpu_log_diag_info(ch_flt);
25810Sstevel@tonic-gate 
25820Sstevel@tonic-gate 	/*
25830Sstevel@tonic-gate 	 * If we have a CEEN error , we do not reenable CEEN until after
25840Sstevel@tonic-gate 	 * we exit the trap handler. Otherwise, another error may
25850Sstevel@tonic-gate 	 * occur causing the handler to be entered recursively.
25860Sstevel@tonic-gate 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
25870Sstevel@tonic-gate 	 * to try and ensure that the CPU makes progress in the face
25880Sstevel@tonic-gate 	 * of a CE storm.
25890Sstevel@tonic-gate 	 */
25900Sstevel@tonic-gate 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
25910Sstevel@tonic-gate 		(void) timeout(cpu_delayed_check_ce_errors,
2592946Smathue 		    (void *)(uintptr_t)aflt->flt_inst,
25930Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
25940Sstevel@tonic-gate 	}
25950Sstevel@tonic-gate }
25960Sstevel@tonic-gate 
25970Sstevel@tonic-gate /*
25980Sstevel@tonic-gate  * Invoked by error_init() early in startup and therefore before
25990Sstevel@tonic-gate  * startup_errorq() is called to drain any error Q -
26000Sstevel@tonic-gate  *
26010Sstevel@tonic-gate  * startup()
26020Sstevel@tonic-gate  *   startup_end()
26030Sstevel@tonic-gate  *     error_init()
26040Sstevel@tonic-gate  *       cpu_error_init()
26050Sstevel@tonic-gate  * errorq_init()
26060Sstevel@tonic-gate  *   errorq_drain()
26070Sstevel@tonic-gate  * start_other_cpus()
26080Sstevel@tonic-gate  *
26090Sstevel@tonic-gate  * The purpose of this routine is to create error-related taskqs.  Taskqs
26100Sstevel@tonic-gate  * are used for this purpose because cpu_lock can't be grabbed from interrupt
26110Sstevel@tonic-gate  * context.
26120Sstevel@tonic-gate  */
26130Sstevel@tonic-gate void
26140Sstevel@tonic-gate cpu_error_init(int items)
26150Sstevel@tonic-gate {
26160Sstevel@tonic-gate 	/*
26170Sstevel@tonic-gate 	 * Create taskq(s) to reenable CE
26180Sstevel@tonic-gate 	 */
26190Sstevel@tonic-gate 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
26200Sstevel@tonic-gate 	    items, items, TASKQ_PREPOPULATE);
26210Sstevel@tonic-gate }
26220Sstevel@tonic-gate 
26230Sstevel@tonic-gate void
26240Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
26250Sstevel@tonic-gate {
26260Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
26270Sstevel@tonic-gate 	int len;
26280Sstevel@tonic-gate 
26290Sstevel@tonic-gate 	switch (aflt->flt_class) {
26300Sstevel@tonic-gate 	case CPU_FAULT:
26310Sstevel@tonic-gate 		cpu_ereport_init(aflt);
26320Sstevel@tonic-gate 		if (cpu_async_log_err(aflt, eqep))
26330Sstevel@tonic-gate 			cpu_ereport_post(aflt);
26340Sstevel@tonic-gate 		break;
26350Sstevel@tonic-gate 
26360Sstevel@tonic-gate 	case BUS_FAULT:
26370Sstevel@tonic-gate 		if (aflt->flt_func != NULL) {
26380Sstevel@tonic-gate 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
26390Sstevel@tonic-gate 			    unum, UNUM_NAMLEN, &len);
26400Sstevel@tonic-gate 			aflt->flt_func(aflt, unum);
26410Sstevel@tonic-gate 		}
26420Sstevel@tonic-gate 		break;
26430Sstevel@tonic-gate 
26440Sstevel@tonic-gate 	case RECIRC_CPU_FAULT:
26450Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
26460Sstevel@tonic-gate 		cpu_log_err(aflt);
26470Sstevel@tonic-gate 		cpu_ereport_post(aflt);
26480Sstevel@tonic-gate 		break;
26490Sstevel@tonic-gate 
26500Sstevel@tonic-gate 	case RECIRC_BUS_FAULT:
26510Sstevel@tonic-gate 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
26520Sstevel@tonic-gate 		/*FALLTHRU*/
26530Sstevel@tonic-gate 	default:
26540Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
26550Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
26560Sstevel@tonic-gate 		return;
26570Sstevel@tonic-gate 	}
26580Sstevel@tonic-gate }
26590Sstevel@tonic-gate 
26600Sstevel@tonic-gate /*
26610Sstevel@tonic-gate  * Scrub and classify a CE.  This function must not modify the
26620Sstevel@tonic-gate  * fault structure passed to it but instead should return the classification
26630Sstevel@tonic-gate  * information.
26640Sstevel@tonic-gate  */
26650Sstevel@tonic-gate 
26660Sstevel@tonic-gate static uchar_t
26670Sstevel@tonic-gate cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
26680Sstevel@tonic-gate {
26690Sstevel@tonic-gate 	uchar_t disp = CE_XDIAG_EXTALG;
26700Sstevel@tonic-gate 	on_trap_data_t otd;
26710Sstevel@tonic-gate 	uint64_t orig_err;
26720Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
26730Sstevel@tonic-gate 
26740Sstevel@tonic-gate 	/*
26750Sstevel@tonic-gate 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
26760Sstevel@tonic-gate 	 * this, but our other callers have not.  Disable preemption to
26770Sstevel@tonic-gate 	 * avoid CPU migration so that we restore CEEN on the correct
26780Sstevel@tonic-gate 	 * cpu later.
26790Sstevel@tonic-gate 	 *
26800Sstevel@tonic-gate 	 * CEEN is cleared so that further CEs that our instruction and
26810Sstevel@tonic-gate 	 * data footprint induce do not cause use to either creep down
26820Sstevel@tonic-gate 	 * kernel stack to the point of overflow, or do so much CE
26830Sstevel@tonic-gate 	 * notification as to make little real forward progress.
26840Sstevel@tonic-gate 	 *
26850Sstevel@tonic-gate 	 * NCEEN must not be cleared.  However it is possible that
26860Sstevel@tonic-gate 	 * our accesses to the flt_addr may provoke a bus error or timeout
26870Sstevel@tonic-gate 	 * if the offending address has just been unconfigured as part of
26880Sstevel@tonic-gate 	 * a DR action.  So we must operate under on_trap protection.
26890Sstevel@tonic-gate 	 */
26900Sstevel@tonic-gate 	kpreempt_disable();
26910Sstevel@tonic-gate 	orig_err = get_error_enable();
26920Sstevel@tonic-gate 	if (orig_err & EN_REG_CEEN)
26930Sstevel@tonic-gate 	    set_error_enable(orig_err & ~EN_REG_CEEN);
26940Sstevel@tonic-gate 
26950Sstevel@tonic-gate 	/*
26960Sstevel@tonic-gate 	 * Our classification algorithm includes the line state before
26970Sstevel@tonic-gate 	 * the scrub; we'd like this captured after the detection and
26980Sstevel@tonic-gate 	 * before the algorithm below - the earlier the better.
26990Sstevel@tonic-gate 	 *
27000Sstevel@tonic-gate 	 * If we've come from a cpu CE trap then this info already exists
27010Sstevel@tonic-gate 	 * in the cpu logout area.
27020Sstevel@tonic-gate 	 *
27030Sstevel@tonic-gate 	 * For a CE detected by memscrub for which there was no trap
27040Sstevel@tonic-gate 	 * (running with CEEN off) cpu_log_and_clear_ce has called
27050Sstevel@tonic-gate 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
27060Sstevel@tonic-gate 	 * marked the fault structure as incomplete as a flag to later
27070Sstevel@tonic-gate 	 * logging code.
27080Sstevel@tonic-gate 	 *
27090Sstevel@tonic-gate 	 * If called directly from an IO detected CE there has been
27100Sstevel@tonic-gate 	 * no line data capture.  In this case we logout to the cpu logout
27110Sstevel@tonic-gate 	 * area - that's appropriate since it's the cpu cache data we need
27120Sstevel@tonic-gate 	 * for classification.  We thus borrow the cpu logout area for a
27130Sstevel@tonic-gate 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
27140Sstevel@tonic-gate 	 * this time (we will invalidate it again below).
27150Sstevel@tonic-gate 	 *
27160Sstevel@tonic-gate 	 * If called from the partner check xcall handler then this cpu
27170Sstevel@tonic-gate 	 * (the partner) has not necessarily experienced a CE at this
27180Sstevel@tonic-gate 	 * address.  But we want to capture line state before its scrub
27190Sstevel@tonic-gate 	 * attempt since we use that in our classification.
27200Sstevel@tonic-gate 	 */
27210Sstevel@tonic-gate 	if (logout_tried == B_FALSE) {
27220Sstevel@tonic-gate 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
27230Sstevel@tonic-gate 			disp |= CE_XDIAG_NOLOGOUT;
27240Sstevel@tonic-gate 	}
27250Sstevel@tonic-gate 
27260Sstevel@tonic-gate 	/*
27270Sstevel@tonic-gate 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
27280Sstevel@tonic-gate 	 * no longer be valid (if DR'd since the initial event) so we
27290Sstevel@tonic-gate 	 * perform this scrub under on_trap protection.  If this access is
27300Sstevel@tonic-gate 	 * ok then further accesses below will also be ok - DR cannot
27310Sstevel@tonic-gate 	 * proceed while this thread is active (preemption is disabled);
27320Sstevel@tonic-gate 	 * to be safe we'll nonetheless use on_trap again below.
27330Sstevel@tonic-gate 	 */
27340Sstevel@tonic-gate 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
27350Sstevel@tonic-gate 		cpu_scrubphys(ecc);
27360Sstevel@tonic-gate 	} else {
27370Sstevel@tonic-gate 		no_trap();
27380Sstevel@tonic-gate 		if (orig_err & EN_REG_CEEN)
27390Sstevel@tonic-gate 		    set_error_enable(orig_err);
27400Sstevel@tonic-gate 		kpreempt_enable();
27410Sstevel@tonic-gate 		return (disp);
27420Sstevel@tonic-gate 	}
27430Sstevel@tonic-gate 	no_trap();
27440Sstevel@tonic-gate 
27450Sstevel@tonic-gate 	/*
27460Sstevel@tonic-gate 	 * Did the casx read of the scrub log a CE that matches the AFAR?
27470Sstevel@tonic-gate 	 * Note that it's quite possible that the read sourced the data from
27480Sstevel@tonic-gate 	 * another cpu.
27490Sstevel@tonic-gate 	 */
27500Sstevel@tonic-gate 	if (clear_ecc(ecc))
27510Sstevel@tonic-gate 		disp |= CE_XDIAG_CE1;
27520Sstevel@tonic-gate 
27530Sstevel@tonic-gate 	/*
27540Sstevel@tonic-gate 	 * Read the data again.  This time the read is very likely to
27550Sstevel@tonic-gate 	 * come from memory since the scrub induced a writeback to memory.
27560Sstevel@tonic-gate 	 */
27570Sstevel@tonic-gate 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
27580Sstevel@tonic-gate 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
27590Sstevel@tonic-gate 	} else {
27600Sstevel@tonic-gate 		no_trap();
27610Sstevel@tonic-gate 		if (orig_err & EN_REG_CEEN)
27620Sstevel@tonic-gate 		    set_error_enable(orig_err);
27630Sstevel@tonic-gate 		kpreempt_enable();
27640Sstevel@tonic-gate 		return (disp);
27650Sstevel@tonic-gate 	}
27660Sstevel@tonic-gate 	no_trap();
27670Sstevel@tonic-gate 
27680Sstevel@tonic-gate 	/* Did that read induce a CE that matches the AFAR? */
27690Sstevel@tonic-gate 	if (clear_ecc(ecc))
27700Sstevel@tonic-gate 		disp |= CE_XDIAG_CE2;
27710Sstevel@tonic-gate 
27720Sstevel@tonic-gate 	/*
27730Sstevel@tonic-gate 	 * Look at the logout information and record whether we found the
27740Sstevel@tonic-gate 	 * line in l2/l3 cache.  For Panther we are interested in whether
27750Sstevel@tonic-gate 	 * we found it in either cache (it won't reside in both but
27760Sstevel@tonic-gate 	 * it is possible to read it that way given the moving target).
27770Sstevel@tonic-gate 	 */
27780Sstevel@tonic-gate 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
27790Sstevel@tonic-gate 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
27800Sstevel@tonic-gate 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
27810Sstevel@tonic-gate 		int hit, level;
27820Sstevel@tonic-gate 		int state;
27830Sstevel@tonic-gate 		int totalsize;
27840Sstevel@tonic-gate 		ch_ec_data_t *ecp;
27850Sstevel@tonic-gate 
27860Sstevel@tonic-gate 		/*
27870Sstevel@tonic-gate 		 * If hit is nonzero then a match was found and hit will
27880Sstevel@tonic-gate 		 * be one greater than the index which hit.  For Panther we
27890Sstevel@tonic-gate 		 * also need to pay attention to level to see which of l2$ or
27900Sstevel@tonic-gate 		 * l3$ it hit in.
27910Sstevel@tonic-gate 		 */
27920Sstevel@tonic-gate 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
27930Sstevel@tonic-gate 		    0, &level);
27940Sstevel@tonic-gate 
27950Sstevel@tonic-gate 		if (hit) {
27960Sstevel@tonic-gate 			--hit;
27970Sstevel@tonic-gate 			disp |= CE_XDIAG_AFARMATCH;
27980Sstevel@tonic-gate 
27990Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
28000Sstevel@tonic-gate 				if (level == 2)
28010Sstevel@tonic-gate 					ecp = &clop->clo_data.chd_l2_data[hit];
28020Sstevel@tonic-gate 				else
28030Sstevel@tonic-gate 					ecp = &clop->clo_data.chd_ec_data[hit];
28040Sstevel@tonic-gate 			} else {
28050Sstevel@tonic-gate 				ASSERT(level == 2);
28060Sstevel@tonic-gate 				ecp = &clop->clo_data.chd_ec_data[hit];
28070Sstevel@tonic-gate 			}
28080Sstevel@tonic-gate 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
28090Sstevel@tonic-gate 			state = cpu_ectag_pa_to_subblk_state(totalsize,
28100Sstevel@tonic-gate 			    ecc->flt_addr, ecp->ec_tag);
28110Sstevel@tonic-gate 
28120Sstevel@tonic-gate 			/*
28130Sstevel@tonic-gate 			 * Cheetah variants use different state encodings -
28140Sstevel@tonic-gate 			 * the CH_ECSTATE_* defines vary depending on the
28150Sstevel@tonic-gate 			 * module we're compiled for.  Translate into our
28160Sstevel@tonic-gate 			 * one true version.  Conflate Owner-Shared state
28170Sstevel@tonic-gate 			 * of SSM mode with Owner as victimisation of such
28180Sstevel@tonic-gate 			 * lines may cause a writeback.
28190Sstevel@tonic-gate 			 */
28200Sstevel@tonic-gate 			switch (state) {
28210Sstevel@tonic-gate 			case CH_ECSTATE_MOD:
28220Sstevel@tonic-gate 				disp |= EC_STATE_M;
28230Sstevel@tonic-gate 				break;
28240Sstevel@tonic-gate 
28250Sstevel@tonic-gate 			case CH_ECSTATE_OWN:
28260Sstevel@tonic-gate 			case CH_ECSTATE_OWS:
28270Sstevel@tonic-gate 				disp |= EC_STATE_O;
28280Sstevel@tonic-gate 				break;
28290Sstevel@tonic-gate 
28300Sstevel@tonic-gate 			case CH_ECSTATE_EXL:
28310Sstevel@tonic-gate 				disp |= EC_STATE_E;
28320Sstevel@tonic-gate 				break;
28330Sstevel@tonic-gate 
28340Sstevel@tonic-gate 			case CH_ECSTATE_SHR:
28350Sstevel@tonic-gate 				disp |= EC_STATE_S;
28360Sstevel@tonic-gate 				break;
28370Sstevel@tonic-gate 
28380Sstevel@tonic-gate 			default:
28390Sstevel@tonic-gate 				disp |= EC_STATE_I;
28400Sstevel@tonic-gate 				break;
28410Sstevel@tonic-gate 			}
28420Sstevel@tonic-gate 		}
28430Sstevel@tonic-gate 
28440Sstevel@tonic-gate 		/*
28450Sstevel@tonic-gate 		 * If we initiated the delayed logout then we are responsible
28460Sstevel@tonic-gate 		 * for invalidating the logout area.
28470Sstevel@tonic-gate 		 */
28480Sstevel@tonic-gate 		if (logout_tried == B_FALSE) {
28490Sstevel@tonic-gate 			bzero(clop, sizeof (ch_cpu_logout_t));
28500Sstevel@tonic-gate 			clop->clo_data.chd_afar = LOGOUT_INVALID;
28510Sstevel@tonic-gate 		}
28520Sstevel@tonic-gate 	}
28530Sstevel@tonic-gate 
28540Sstevel@tonic-gate 	/*
28550Sstevel@tonic-gate 	 * Re-enable CEEN if we turned it off.
28560Sstevel@tonic-gate 	 */
28570Sstevel@tonic-gate 	if (orig_err & EN_REG_CEEN)
28580Sstevel@tonic-gate 	    set_error_enable(orig_err);
28590Sstevel@tonic-gate 	kpreempt_enable();
28600Sstevel@tonic-gate 
28610Sstevel@tonic-gate 	return (disp);
28620Sstevel@tonic-gate }
28630Sstevel@tonic-gate 
28640Sstevel@tonic-gate /*
28650Sstevel@tonic-gate  * Scrub a correctable memory error and collect data for classification
28660Sstevel@tonic-gate  * of CE type.  This function is called in the detection path, ie tl0 handling
28670Sstevel@tonic-gate  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
28680Sstevel@tonic-gate  */
28690Sstevel@tonic-gate void
28700Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
28710Sstevel@tonic-gate {
28720Sstevel@tonic-gate 	/*
28730Sstevel@tonic-gate 	 * Cheetah CE classification does not set any bits in flt_status.
28740Sstevel@tonic-gate 	 * Instead we will record classification datapoints in flt_disp.
28750Sstevel@tonic-gate 	 */
28760Sstevel@tonic-gate 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
28770Sstevel@tonic-gate 
28780Sstevel@tonic-gate 	/*
28790Sstevel@tonic-gate 	 * To check if the error detected by IO is persistent, sticky or
28800Sstevel@tonic-gate 	 * intermittent.  This is noticed by clear_ecc().
28810Sstevel@tonic-gate 	 */
28820Sstevel@tonic-gate 	if (ecc->flt_status & ECC_IOBUS)
28830Sstevel@tonic-gate 		ecc->flt_stat = C_AFSR_MEMORY;
28840Sstevel@tonic-gate 
28850Sstevel@tonic-gate 	/*
28860Sstevel@tonic-gate 	 * Record information from this first part of the algorithm in
28870Sstevel@tonic-gate 	 * flt_disp.
28880Sstevel@tonic-gate 	 */
28890Sstevel@tonic-gate 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
28900Sstevel@tonic-gate }
28910Sstevel@tonic-gate 
28920Sstevel@tonic-gate /*
28930Sstevel@tonic-gate  * Select a partner to perform a further CE classification check from.
28940Sstevel@tonic-gate  * Must be called with kernel preemption disabled (to stop the cpu list
28950Sstevel@tonic-gate  * from changing).  The detecting cpu we are partnering has cpuid
28960Sstevel@tonic-gate  * aflt->flt_inst; we might not be running on the detecting cpu.
28970Sstevel@tonic-gate  *
28980Sstevel@tonic-gate  * Restrict choice to active cpus in the same cpu partition as ourselves in
28990Sstevel@tonic-gate  * an effort to stop bad cpus in one partition causing other partitions to
29000Sstevel@tonic-gate  * perform excessive diagnostic activity.  Actually since the errorq drain
29010Sstevel@tonic-gate  * is run from a softint most of the time and that is a global mechanism
29020Sstevel@tonic-gate  * this isolation is only partial.  Return NULL if we fail to find a
29030Sstevel@tonic-gate  * suitable partner.
29040Sstevel@tonic-gate  *
29050Sstevel@tonic-gate  * We prefer a partner that is in a different latency group to ourselves as
29060Sstevel@tonic-gate  * we will share fewer datapaths.  If such a partner is unavailable then
29070Sstevel@tonic-gate  * choose one in the same lgroup but prefer a different chip and only allow
29080Sstevel@tonic-gate  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
29090Sstevel@tonic-gate  * flags includes PTNR_SELFOK then permit selection of the original detector.
29100Sstevel@tonic-gate  *
29110Sstevel@tonic-gate  * We keep a cache of the last partner selected for a cpu, and we'll try to
29120Sstevel@tonic-gate  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
29130Sstevel@tonic-gate  * have passed since that selection was made.  This provides the benefit
29140Sstevel@tonic-gate  * of the point-of-view of different partners over time but without
29150Sstevel@tonic-gate  * requiring frequent cpu list traversals.
29160Sstevel@tonic-gate  */
29170Sstevel@tonic-gate 
29180Sstevel@tonic-gate #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
29190Sstevel@tonic-gate #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
29200Sstevel@tonic-gate 
29210Sstevel@tonic-gate static cpu_t *
29220Sstevel@tonic-gate ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
29230Sstevel@tonic-gate {
29240Sstevel@tonic-gate 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
29250Sstevel@tonic-gate 	hrtime_t lasttime, thistime;
29260Sstevel@tonic-gate 
29270Sstevel@tonic-gate 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
29280Sstevel@tonic-gate 
29290Sstevel@tonic-gate 	dtcr = cpu[aflt->flt_inst];
29300Sstevel@tonic-gate 
29310Sstevel@tonic-gate 	/*
29320Sstevel@tonic-gate 	 * Short-circuit for the following cases:
29330Sstevel@tonic-gate 	 *	. the dtcr is not flagged active
29340Sstevel@tonic-gate 	 *	. there is just one cpu present
29350Sstevel@tonic-gate 	 *	. the detector has disappeared
29360Sstevel@tonic-gate 	 *	. we were given a bad flt_inst cpuid; this should not happen
29370Sstevel@tonic-gate 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
29380Sstevel@tonic-gate 	 *	  reason to panic.
29390Sstevel@tonic-gate 	 *	. there is just one cpu left online in the cpu partition
29400Sstevel@tonic-gate 	 *
29410Sstevel@tonic-gate 	 * If we return NULL after this point then we do not update the
29420Sstevel@tonic-gate 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
29430Sstevel@tonic-gate 	 * again next time; this is the case where the only other cpu online
29440Sstevel@tonic-gate 	 * in the detector's partition is on the same chip as the detector
29450Sstevel@tonic-gate 	 * and since CEEN re-enable is throttled even that case should not
29460Sstevel@tonic-gate 	 * hurt performance.
29470Sstevel@tonic-gate 	 */
29480Sstevel@tonic-gate 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
29490Sstevel@tonic-gate 		return (NULL);
29500Sstevel@tonic-gate 	}
29510Sstevel@tonic-gate 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
29520Sstevel@tonic-gate 		if (flags & PTNR_SELFOK) {
29530Sstevel@tonic-gate 			*typep = CE_XDIAG_PTNR_SELF;
29540Sstevel@tonic-gate 			return (dtcr);
29550Sstevel@tonic-gate 		} else {
29560Sstevel@tonic-gate 			return (NULL);
29570Sstevel@tonic-gate 		}
29580Sstevel@tonic-gate 	}
29590Sstevel@tonic-gate 
29600Sstevel@tonic-gate 	thistime = gethrtime();
29610Sstevel@tonic-gate 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
29620Sstevel@tonic-gate 
29630Sstevel@tonic-gate 	/*
29640Sstevel@tonic-gate 	 * Select a starting point.
29650Sstevel@tonic-gate 	 */
29660Sstevel@tonic-gate 	if (!lasttime) {
29670Sstevel@tonic-gate 		/*
29680Sstevel@tonic-gate 		 * We've never selected a partner for this detector before.
29690Sstevel@tonic-gate 		 * Start the scan at the next online cpu in the same cpu
29700Sstevel@tonic-gate 		 * partition.
29710Sstevel@tonic-gate 		 */
29720Sstevel@tonic-gate 		sp = dtcr->cpu_next_part;
29730Sstevel@tonic-gate 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
29740Sstevel@tonic-gate 		/*
29750Sstevel@tonic-gate 		 * Our last selection has not aged yet.  If this partner:
29760Sstevel@tonic-gate 		 *	. is still a valid cpu,
29770Sstevel@tonic-gate 		 *	. is still in the same partition as the detector
29780Sstevel@tonic-gate 		 *	. is still marked active
29790Sstevel@tonic-gate 		 *	. satisfies the 'flags' argument criteria
29800Sstevel@tonic-gate 		 * then select it again without updating the timestamp.
29810Sstevel@tonic-gate 		 */
29820Sstevel@tonic-gate 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
29830Sstevel@tonic-gate 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
29840Sstevel@tonic-gate 		    !cpu_flagged_active(sp->cpu_flags) ||
29850Sstevel@tonic-gate 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
29860Sstevel@tonic-gate 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
29870Sstevel@tonic-gate 		    !(flags & PTNR_SIBLINGOK))) {
29880Sstevel@tonic-gate 			sp = dtcr->cpu_next_part;
29890Sstevel@tonic-gate 		} else {
29900Sstevel@tonic-gate 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
29910Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_REMOTE;
29920Sstevel@tonic-gate 			} else if (sp == dtcr) {
29930Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_SELF;
29940Sstevel@tonic-gate 			} else if (sp->cpu_chip->chip_id ==
29950Sstevel@tonic-gate 			    dtcr->cpu_chip->chip_id) {
29960Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_SIBLING;
29970Sstevel@tonic-gate 			} else {
29980Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_LOCAL;
29990Sstevel@tonic-gate 			}
30000Sstevel@tonic-gate 			return (sp);
30010Sstevel@tonic-gate 		}
30020Sstevel@tonic-gate 	} else {
30030Sstevel@tonic-gate 		/*
30040Sstevel@tonic-gate 		 * Our last selection has aged.  If it is nonetheless still a
30050Sstevel@tonic-gate 		 * valid cpu then start the scan at the next cpu in the
30060Sstevel@tonic-gate 		 * partition after our last partner.  If the last selection
30070Sstevel@tonic-gate 		 * is no longer a valid cpu then go with our default.  In
30080Sstevel@tonic-gate 		 * this way we slowly cycle through possible partners to
30090Sstevel@tonic-gate 		 * obtain multiple viewpoints over time.
30100Sstevel@tonic-gate 		 */
30110Sstevel@tonic-gate 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
30120Sstevel@tonic-gate 		if (sp == NULL) {
30130Sstevel@tonic-gate 			sp = dtcr->cpu_next_part;
30140Sstevel@tonic-gate 		} else {
30150Sstevel@tonic-gate 			sp = sp->cpu_next_part;		/* may be dtcr */
30160Sstevel@tonic-gate 			if (sp->cpu_part != dtcr->cpu_part)
30170Sstevel@tonic-gate 				sp = dtcr;
30180Sstevel@tonic-gate 		}
30190Sstevel@tonic-gate 	}
30200Sstevel@tonic-gate 
30210Sstevel@tonic-gate 	/*
30220Sstevel@tonic-gate 	 * We have a proposed starting point for our search, but if this
30230Sstevel@tonic-gate 	 * cpu is offline then its cpu_next_part will point to itself
30240Sstevel@tonic-gate 	 * so we can't use that to iterate over cpus in this partition in
30250Sstevel@tonic-gate 	 * the loop below.  We still want to avoid iterating over cpus not
30260Sstevel@tonic-gate 	 * in our partition, so in the case that our starting point is offline
30270Sstevel@tonic-gate 	 * we will repoint it to be the detector itself;  and if the detector
30280Sstevel@tonic-gate 	 * happens to be offline we'll return NULL from the following loop.
30290Sstevel@tonic-gate 	 */
30300Sstevel@tonic-gate 	if (!cpu_flagged_active(sp->cpu_flags)) {
30310Sstevel@tonic-gate 		sp = dtcr;
30320Sstevel@tonic-gate 	}
30330Sstevel@tonic-gate 
30340Sstevel@tonic-gate 	ptnr = sp;
30350Sstevel@tonic-gate 	locptnr = NULL;
30360Sstevel@tonic-gate 	sibptnr = NULL;
30370Sstevel@tonic-gate 	do {
30380Sstevel@tonic-gate 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
30390Sstevel@tonic-gate 			continue;
30400Sstevel@tonic-gate 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
30410Sstevel@tonic-gate 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
30420Sstevel@tonic-gate 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
30430Sstevel@tonic-gate 			*typep = CE_XDIAG_PTNR_REMOTE;
30440Sstevel@tonic-gate 			return (ptnr);
30450Sstevel@tonic-gate 		}
30460Sstevel@tonic-gate 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
30470Sstevel@tonic-gate 			if (sibptnr == NULL)
30480Sstevel@tonic-gate 				sibptnr = ptnr;
30490Sstevel@tonic-gate 			continue;
30500Sstevel@tonic-gate 		}
30510Sstevel@tonic-gate 		if (locptnr == NULL)
30520Sstevel@tonic-gate 			locptnr = ptnr;
30530Sstevel@tonic-gate 	} while ((ptnr = ptnr->cpu_next_part) != sp);
30540Sstevel@tonic-gate 
30550Sstevel@tonic-gate 	/*
30560Sstevel@tonic-gate 	 * A foreign partner has already been returned if one was available.
30570Sstevel@tonic-gate 	 *
30580Sstevel@tonic-gate 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
30590Sstevel@tonic-gate 	 * detector, is active, and is not a sibling of the detector.
30600Sstevel@tonic-gate 	 *
30610Sstevel@tonic-gate 	 * If sibptnr is not NULL it is a sibling of the detector, and is
30620Sstevel@tonic-gate 	 * active.
30630Sstevel@tonic-gate 	 *
30640Sstevel@tonic-gate 	 * If we have to resort to using the detector itself we have already
30650Sstevel@tonic-gate 	 * checked that it is active.
30660Sstevel@tonic-gate 	 */
30670Sstevel@tonic-gate 	if (locptnr) {
30680Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
30690Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
30700Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_LOCAL;
30710Sstevel@tonic-gate 		return (locptnr);
30720Sstevel@tonic-gate 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
30730Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
30740Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
30750Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_SIBLING;
30760Sstevel@tonic-gate 		return (sibptnr);
30770Sstevel@tonic-gate 	} else if (flags & PTNR_SELFOK) {
30780Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
30790Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
30800Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_SELF;
30810Sstevel@tonic-gate 		return (dtcr);
30820Sstevel@tonic-gate 	}
30830Sstevel@tonic-gate 
30840Sstevel@tonic-gate 	return (NULL);
30850Sstevel@tonic-gate }
30860Sstevel@tonic-gate 
30870Sstevel@tonic-gate /*
30880Sstevel@tonic-gate  * Cross call handler that is requested to run on the designated partner of
30890Sstevel@tonic-gate  * a cpu that experienced a possibly sticky or possibly persistnet CE.
30900Sstevel@tonic-gate  */
30910Sstevel@tonic-gate static void
30920Sstevel@tonic-gate ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
30930Sstevel@tonic-gate {
30940Sstevel@tonic-gate 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
30950Sstevel@tonic-gate }
30960Sstevel@tonic-gate 
30970Sstevel@tonic-gate /*
30980Sstevel@tonic-gate  * The associated errorqs are never destroyed so we do not need to deal with
30990Sstevel@tonic-gate  * them disappearing before this timeout fires.  If the affected memory
31000Sstevel@tonic-gate  * has been DR'd out since the original event the scrub algrithm will catch
31010Sstevel@tonic-gate  * any errors and return null disposition info.  If the original detecting
31020Sstevel@tonic-gate  * cpu has been DR'd out then ereport detector info will not be able to
31030Sstevel@tonic-gate  * lookup CPU type;  with a small timeout this is unlikely.
31040Sstevel@tonic-gate  */
31050Sstevel@tonic-gate static void
31060Sstevel@tonic-gate ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
31070Sstevel@tonic-gate {
31080Sstevel@tonic-gate 	struct async_flt *aflt = cbarg->lkycb_aflt;
31090Sstevel@tonic-gate 	uchar_t disp;
31100Sstevel@tonic-gate 	cpu_t *cp;
31110Sstevel@tonic-gate 	int ptnrtype;
31120Sstevel@tonic-gate 
31130Sstevel@tonic-gate 	kpreempt_disable();
31140Sstevel@tonic-gate 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
31150Sstevel@tonic-gate 	    &ptnrtype)) {
31160Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
31170Sstevel@tonic-gate 		    (uint64_t)&disp);
31180Sstevel@tonic-gate 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
31190Sstevel@tonic-gate 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
31200Sstevel@tonic-gate 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
31210Sstevel@tonic-gate 	} else {
31220Sstevel@tonic-gate 		ce_xdiag_lkydrops++;
31230Sstevel@tonic-gate 		if (ncpus > 1)
31240Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
31250Sstevel@tonic-gate 			    CE_XDIAG_SKIP_NOPTNR);
31260Sstevel@tonic-gate 	}
31270Sstevel@tonic-gate 	kpreempt_enable();
31280Sstevel@tonic-gate 
31290Sstevel@tonic-gate 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
31300Sstevel@tonic-gate 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
31310Sstevel@tonic-gate }
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate /*
31340Sstevel@tonic-gate  * Called from errorq drain code when processing a CE error, both from
31350Sstevel@tonic-gate  * CPU and PCI drain functions.  Decide what further classification actions,
31360Sstevel@tonic-gate  * if any, we will perform.  Perform immediate actions now, and schedule
31370Sstevel@tonic-gate  * delayed actions as required.  Note that we are no longer necessarily running
31380Sstevel@tonic-gate  * on the detecting cpu, and that the async_flt structure will not persist on
31390Sstevel@tonic-gate  * return from this function.
31400Sstevel@tonic-gate  *
31410Sstevel@tonic-gate  * Calls to this function should aim to be self-throtlling in some way.  With
31420Sstevel@tonic-gate  * the delayed re-enable of CEEN the absolute rate of calls should not
31430Sstevel@tonic-gate  * be excessive.  Callers should also avoid performing in-depth classification
31440Sstevel@tonic-gate  * for events in pages that are already known to be suspect.
31450Sstevel@tonic-gate  *
31460Sstevel@tonic-gate  * We return nonzero to indicate that the event has been copied and
31470Sstevel@tonic-gate  * recirculated for further testing.  The caller should not log the event
31480Sstevel@tonic-gate  * in this case - it will be logged when further test results are available.
31490Sstevel@tonic-gate  *
31500Sstevel@tonic-gate  * Our possible contexts are that of errorq_drain: below lock level or from
31510Sstevel@tonic-gate  * panic context.  We can assume that the cpu we are running on is online.
31520Sstevel@tonic-gate  */
31530Sstevel@tonic-gate 
31540Sstevel@tonic-gate 
31550Sstevel@tonic-gate #ifdef DEBUG
31560Sstevel@tonic-gate static int ce_xdiag_forceaction;
31570Sstevel@tonic-gate #endif
31580Sstevel@tonic-gate 
31590Sstevel@tonic-gate int
31600Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
31610Sstevel@tonic-gate     errorq_elem_t *eqep, size_t afltoffset)
31620Sstevel@tonic-gate {
31630Sstevel@tonic-gate 	ce_dispact_t dispact, action;
31640Sstevel@tonic-gate 	cpu_t *cp;
31650Sstevel@tonic-gate 	uchar_t dtcrinfo, disp;
31660Sstevel@tonic-gate 	int ptnrtype;
31670Sstevel@tonic-gate 
31680Sstevel@tonic-gate 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
31690Sstevel@tonic-gate 		ce_xdiag_drops++;
31700Sstevel@tonic-gate 		return (0);
31710Sstevel@tonic-gate 	} else if (!aflt->flt_in_memory) {
31720Sstevel@tonic-gate 		ce_xdiag_drops++;
31730Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
31740Sstevel@tonic-gate 		return (0);
31750Sstevel@tonic-gate 	}
31760Sstevel@tonic-gate 
31770Sstevel@tonic-gate 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
31780Sstevel@tonic-gate 
31790Sstevel@tonic-gate 	/*
31800Sstevel@tonic-gate 	 * Some correctable events are not scrubbed/classified, such as those
31810Sstevel@tonic-gate 	 * noticed at the tail of cpu_deferred_error.  So if there is no
31820Sstevel@tonic-gate 	 * initial detector classification go no further.
31830Sstevel@tonic-gate 	 */
31840Sstevel@tonic-gate 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
31850Sstevel@tonic-gate 		ce_xdiag_drops++;
31860Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
31870Sstevel@tonic-gate 		return (0);
31880Sstevel@tonic-gate 	}
31890Sstevel@tonic-gate 
31900Sstevel@tonic-gate 	dispact = CE_DISPACT(ce_disp_table,
31910Sstevel@tonic-gate 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
31920Sstevel@tonic-gate 	    CE_XDIAG_STATE(dtcrinfo),
31930Sstevel@tonic-gate 	    CE_XDIAG_CE1SEEN(dtcrinfo),
31940Sstevel@tonic-gate 	    CE_XDIAG_CE2SEEN(dtcrinfo));
31950Sstevel@tonic-gate 
31960Sstevel@tonic-gate 
31970Sstevel@tonic-gate 	action = CE_ACT(dispact);	/* bad lookup caught below */
31980Sstevel@tonic-gate #ifdef DEBUG
31990Sstevel@tonic-gate 	if (ce_xdiag_forceaction != 0)
32000Sstevel@tonic-gate 		action = ce_xdiag_forceaction;
32010Sstevel@tonic-gate #endif
32020Sstevel@tonic-gate 
32030Sstevel@tonic-gate 	switch (action) {
32040Sstevel@tonic-gate 	case CE_ACT_LKYCHK: {
32050Sstevel@tonic-gate 		caddr_t ndata;
32060Sstevel@tonic-gate 		errorq_elem_t *neqep;
32070Sstevel@tonic-gate 		struct async_flt *ecc;
32080Sstevel@tonic-gate 		ce_lkychk_cb_t *cbargp;
32090Sstevel@tonic-gate 
32100Sstevel@tonic-gate 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
32110Sstevel@tonic-gate 			ce_xdiag_lkydrops++;
32120Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
32130Sstevel@tonic-gate 			    CE_XDIAG_SKIP_DUPFAIL);
32140Sstevel@tonic-gate 			break;
32150Sstevel@tonic-gate 		}
32160Sstevel@tonic-gate 		ecc = (struct async_flt *)(ndata + afltoffset);
32170Sstevel@tonic-gate 
32180Sstevel@tonic-gate 		ASSERT(ecc->flt_class == CPU_FAULT ||
32190Sstevel@tonic-gate 		    ecc->flt_class == BUS_FAULT);
32200Sstevel@tonic-gate 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
32210Sstevel@tonic-gate 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
32220Sstevel@tonic-gate 
32230Sstevel@tonic-gate 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
32240Sstevel@tonic-gate 		cbargp->lkycb_aflt = ecc;
32250Sstevel@tonic-gate 		cbargp->lkycb_eqp = eqp;
32260Sstevel@tonic-gate 		cbargp->lkycb_eqep = neqep;
32270Sstevel@tonic-gate 
32280Sstevel@tonic-gate 		(void) timeout((void (*)(void *))ce_lkychk_cb,
32290Sstevel@tonic-gate 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
32300Sstevel@tonic-gate 		return (1);
32310Sstevel@tonic-gate 	}
32320Sstevel@tonic-gate 
32330Sstevel@tonic-gate 	case CE_ACT_PTNRCHK:
32340Sstevel@tonic-gate 		kpreempt_disable();	/* stop cpu list changing */
32350Sstevel@tonic-gate 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
32360Sstevel@tonic-gate 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
32370Sstevel@tonic-gate 			    (uint64_t)aflt, (uint64_t)&disp);
32380Sstevel@tonic-gate 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
32390Sstevel@tonic-gate 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
32400Sstevel@tonic-gate 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
32410Sstevel@tonic-gate 		} else if (ncpus > 1) {
32420Sstevel@tonic-gate 			ce_xdiag_ptnrdrops++;
32430Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
32440Sstevel@tonic-gate 			    CE_XDIAG_SKIP_NOPTNR);
32450Sstevel@tonic-gate 		} else {
32460Sstevel@tonic-gate 			ce_xdiag_ptnrdrops++;
32470Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
32480Sstevel@tonic-gate 			    CE_XDIAG_SKIP_UNIPROC);
32490Sstevel@tonic-gate 		}
32500Sstevel@tonic-gate 		kpreempt_enable();
32510Sstevel@tonic-gate 		break;
32520Sstevel@tonic-gate 
32530Sstevel@tonic-gate 	case CE_ACT_DONE:
32540Sstevel@tonic-gate 		break;
32550Sstevel@tonic-gate 
32560Sstevel@tonic-gate 	case CE_ACT(CE_DISP_BAD):
32570Sstevel@tonic-gate 	default:
32580Sstevel@tonic-gate #ifdef DEBUG
32590Sstevel@tonic-gate 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
32600Sstevel@tonic-gate #endif
32610Sstevel@tonic-gate 		ce_xdiag_bad++;
32620Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
32630Sstevel@tonic-gate 		break;
32640Sstevel@tonic-gate 	}
32650Sstevel@tonic-gate 
32660Sstevel@tonic-gate 	return (0);
32670Sstevel@tonic-gate }
32680Sstevel@tonic-gate 
32690Sstevel@tonic-gate /*
32700Sstevel@tonic-gate  * We route all errors through a single switch statement.
32710Sstevel@tonic-gate  */
32720Sstevel@tonic-gate void
32730Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
32740Sstevel@tonic-gate {
32750Sstevel@tonic-gate 	switch (aflt->flt_class) {
32760Sstevel@tonic-gate 	case CPU_FAULT:
32770Sstevel@tonic-gate 		cpu_ereport_init(aflt);
32780Sstevel@tonic-gate 		if (cpu_async_log_err(aflt, NULL))
32790Sstevel@tonic-gate 			cpu_ereport_post(aflt);
32800Sstevel@tonic-gate 		break;
32810Sstevel@tonic-gate 
32820Sstevel@tonic-gate 	case BUS_FAULT:
32830Sstevel@tonic-gate 		bus_async_log_err(aflt);
32840Sstevel@tonic-gate 		break;
32850Sstevel@tonic-gate 
32860Sstevel@tonic-gate 	default:
32870Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding async error %p with invalid "
32880Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
32890Sstevel@tonic-gate 		return;
32900Sstevel@tonic-gate 	}
32910Sstevel@tonic-gate }
32920Sstevel@tonic-gate 
32930Sstevel@tonic-gate /*
32940Sstevel@tonic-gate  * Routine for panic hook callback from panic_idle().
32950Sstevel@tonic-gate  */
32960Sstevel@tonic-gate void
32970Sstevel@tonic-gate cpu_async_panic_callb(void)
32980Sstevel@tonic-gate {
32990Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
33000Sstevel@tonic-gate 	struct async_flt *aflt;
33010Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
33020Sstevel@tonic-gate 	uint64_t afsr_errs;
33030Sstevel@tonic-gate 
33040Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
33050Sstevel@tonic-gate 
33060Sstevel@tonic-gate 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
33070Sstevel@tonic-gate 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
33080Sstevel@tonic-gate 
33090Sstevel@tonic-gate 	if (afsr_errs) {
33100Sstevel@tonic-gate 
33110Sstevel@tonic-gate 		bzero(&ch_flt, sizeof (ch_async_flt_t));
33120Sstevel@tonic-gate 		aflt = (struct async_flt *)&ch_flt;
33130Sstevel@tonic-gate 		aflt->flt_id = gethrtime_waitfree();
33140Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
33150Sstevel@tonic-gate 		aflt->flt_inst = CPU->cpu_id;
33160Sstevel@tonic-gate 		aflt->flt_stat = cpu_error_regs.afsr;
33170Sstevel@tonic-gate 		aflt->flt_addr = cpu_error_regs.afar;
33180Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
33190Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
33200Sstevel@tonic-gate 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
33210Sstevel@tonic-gate 		aflt->flt_panic = 1;
33220Sstevel@tonic-gate 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
33230Sstevel@tonic-gate 		ch_flt.afsr_errs = afsr_errs;
33240Sstevel@tonic-gate #if defined(SERRANO)
33250Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
33260Sstevel@tonic-gate #endif	/* SERRANO */
33270Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
33280Sstevel@tonic-gate 	}
33290Sstevel@tonic-gate }
33300Sstevel@tonic-gate 
33310Sstevel@tonic-gate /*
33320Sstevel@tonic-gate  * Routine to convert a syndrome into a syndrome code.
33330Sstevel@tonic-gate  */
33340Sstevel@tonic-gate static int
33350Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
33360Sstevel@tonic-gate {
33370Sstevel@tonic-gate 	if (synd_status == AFLT_STAT_INVALID)
33380Sstevel@tonic-gate 		return (-1);
33390Sstevel@tonic-gate 
33400Sstevel@tonic-gate 	/*
33410Sstevel@tonic-gate 	 * Use the syndrome to index the appropriate syndrome table,
33420Sstevel@tonic-gate 	 * to get the code indicating which bit(s) is(are) bad.
33430Sstevel@tonic-gate 	 */
33440Sstevel@tonic-gate 	if (afsr_bit &
33450Sstevel@tonic-gate 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
33460Sstevel@tonic-gate 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
33470Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
33480Sstevel@tonic-gate 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
33490Sstevel@tonic-gate 				return (-1);
33500Sstevel@tonic-gate 			else
33510Sstevel@tonic-gate 				return (BPAR0 + synd);
33520Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
33530Sstevel@tonic-gate 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
33540Sstevel@tonic-gate 				return (-1);
33550Sstevel@tonic-gate 			else
33560Sstevel@tonic-gate 				return (mtag_syndrome_tab[synd]);
33570Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
33580Sstevel@tonic-gate 		} else {
33590Sstevel@tonic-gate 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
33600Sstevel@tonic-gate 				return (-1);
33610Sstevel@tonic-gate 			else
33620Sstevel@tonic-gate 				return (ecc_syndrome_tab[synd]);
33630Sstevel@tonic-gate 		}
33640Sstevel@tonic-gate 	} else {
33650Sstevel@tonic-gate 		return (-1);
33660Sstevel@tonic-gate 	}
33670Sstevel@tonic-gate }
33680Sstevel@tonic-gate 
33690Sstevel@tonic-gate /*
33700Sstevel@tonic-gate  * Routine to return a string identifying the physical name
33710Sstevel@tonic-gate  * associated with a memory/cache error.
33720Sstevel@tonic-gate  */
33730Sstevel@tonic-gate int
33740Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
33750Sstevel@tonic-gate     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
33760Sstevel@tonic-gate     ushort_t flt_status, char *buf, int buflen, int *lenp)
33770Sstevel@tonic-gate {
33780Sstevel@tonic-gate 	int synd_code;
33790Sstevel@tonic-gate 	int ret;
33800Sstevel@tonic-gate 
33810Sstevel@tonic-gate 	/*
33820Sstevel@tonic-gate 	 * An AFSR of -1 defaults to a memory syndrome.
33830Sstevel@tonic-gate 	 */
33840Sstevel@tonic-gate 	if (flt_stat == (uint64_t)-1)
33850Sstevel@tonic-gate 		flt_stat = C_AFSR_CE;
33860Sstevel@tonic-gate 
33870Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
33880Sstevel@tonic-gate 
33890Sstevel@tonic-gate 	/*
33900Sstevel@tonic-gate 	 * Syndrome code must be either a single-bit error code
33910Sstevel@tonic-gate 	 * (0...143) or -1 for unum lookup.
33920Sstevel@tonic-gate 	 */
33930Sstevel@tonic-gate 	if (synd_code < 0 || synd_code >= M2)
33940Sstevel@tonic-gate 		synd_code = -1;
33950Sstevel@tonic-gate 	if (&plat_get_mem_unum) {
33960Sstevel@tonic-gate 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
33970Sstevel@tonic-gate 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
33980Sstevel@tonic-gate 			buf[0] = '\0';
33990Sstevel@tonic-gate 			*lenp = 0;
34000Sstevel@tonic-gate 		}
34010Sstevel@tonic-gate 
34020Sstevel@tonic-gate 		return (ret);
34030Sstevel@tonic-gate 	}
34040Sstevel@tonic-gate 
34050Sstevel@tonic-gate 	return (ENOTSUP);
34060Sstevel@tonic-gate }
34070Sstevel@tonic-gate 
34080Sstevel@tonic-gate /*
34090Sstevel@tonic-gate  * Wrapper for cpu_get_mem_unum() routine that takes an
34100Sstevel@tonic-gate  * async_flt struct rather than explicit arguments.
34110Sstevel@tonic-gate  */
34120Sstevel@tonic-gate int
34130Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
34140Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
34150Sstevel@tonic-gate {
34160Sstevel@tonic-gate 	/*
34170Sstevel@tonic-gate 	 * If we come thru here for an IO bus error aflt->flt_stat will
34180Sstevel@tonic-gate 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
34190Sstevel@tonic-gate 	 * so it will interpret this as a memory error.
34200Sstevel@tonic-gate 	 */
34210Sstevel@tonic-gate 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
34220Sstevel@tonic-gate 	    (aflt->flt_class == BUS_FAULT) ?
34230Sstevel@tonic-gate 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
34240Sstevel@tonic-gate 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
34250Sstevel@tonic-gate 	    aflt->flt_status, buf, buflen, lenp));
34260Sstevel@tonic-gate }
34270Sstevel@tonic-gate 
34280Sstevel@tonic-gate /*
34290Sstevel@tonic-gate  * This routine is a more generic interface to cpu_get_mem_unum()
34300Sstevel@tonic-gate  * that may be used by other modules (e.g. mm).
34310Sstevel@tonic-gate  */
34320Sstevel@tonic-gate int
34330Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
34340Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
34350Sstevel@tonic-gate {
34360Sstevel@tonic-gate 	int synd_status, flt_in_memory, ret;
34370Sstevel@tonic-gate 	ushort_t flt_status = 0;
34380Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
34390Sstevel@tonic-gate 
34400Sstevel@tonic-gate 	/*
34410Sstevel@tonic-gate 	 * Check for an invalid address.
34420Sstevel@tonic-gate 	 */
34430Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
34440Sstevel@tonic-gate 		return (ENXIO);
34450Sstevel@tonic-gate 
34460Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
34470Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
34480Sstevel@tonic-gate 	else
34490Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
34500Sstevel@tonic-gate 
34510Sstevel@tonic-gate 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
34520Sstevel@tonic-gate 	    pf_is_memory(afar >> MMU_PAGESHIFT);
34530Sstevel@tonic-gate 
34540Sstevel@tonic-gate 	/*
34550Sstevel@tonic-gate 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
34560Sstevel@tonic-gate 	 * For Panther, L2$ is not external, so we don't want to
34570Sstevel@tonic-gate 	 * generate an E$ unum for those errors.
34580Sstevel@tonic-gate 	 */
34590Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
34600Sstevel@tonic-gate 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
34610Sstevel@tonic-gate 			flt_status |= ECC_ECACHE;
34620Sstevel@tonic-gate 	} else {
34630Sstevel@tonic-gate 		if (*afsr & C_AFSR_ECACHE)
34640Sstevel@tonic-gate 			flt_status |= ECC_ECACHE;
34650Sstevel@tonic-gate 	}
34660Sstevel@tonic-gate 
34670Sstevel@tonic-gate 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
34680Sstevel@tonic-gate 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
34690Sstevel@tonic-gate 	if (ret != 0)
34700Sstevel@tonic-gate 		return (ret);
34710Sstevel@tonic-gate 
34720Sstevel@tonic-gate 	if (*lenp >= buflen)
34730Sstevel@tonic-gate 		return (ENAMETOOLONG);
34740Sstevel@tonic-gate 
34750Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
34760Sstevel@tonic-gate 
34770Sstevel@tonic-gate 	return (0);
34780Sstevel@tonic-gate }
34790Sstevel@tonic-gate 
34800Sstevel@tonic-gate /*
34810Sstevel@tonic-gate  * Routine to return memory information associated
34820Sstevel@tonic-gate  * with a physical address and syndrome.
34830Sstevel@tonic-gate  */
34840Sstevel@tonic-gate int
34850Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar,
34860Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
34870Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp)
34880Sstevel@tonic-gate {
34890Sstevel@tonic-gate 	int synd_status, synd_code;
34900Sstevel@tonic-gate 
34910Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
34920Sstevel@tonic-gate 		return (ENXIO);
34930Sstevel@tonic-gate 
34940Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
34950Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
34960Sstevel@tonic-gate 	else
34970Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
34980Sstevel@tonic-gate 
34990Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
35000Sstevel@tonic-gate 
35010Sstevel@tonic-gate 	if (p2get_mem_info != NULL)
35020Sstevel@tonic-gate 		return ((p2get_mem_info)(synd_code, afar,
35030Sstevel@tonic-gate 			mem_sizep, seg_sizep, bank_sizep,
35040Sstevel@tonic-gate 			segsp, banksp, mcidp));
35050Sstevel@tonic-gate 	else
35060Sstevel@tonic-gate 		return (ENOTSUP);
35070Sstevel@tonic-gate }
35080Sstevel@tonic-gate 
35090Sstevel@tonic-gate /*
35100Sstevel@tonic-gate  * Routine to return a string identifying the physical
35110Sstevel@tonic-gate  * name associated with a cpuid.
35120Sstevel@tonic-gate  */
35130Sstevel@tonic-gate int
35140Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
35150Sstevel@tonic-gate {
35160Sstevel@tonic-gate 	int ret;
35170Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
35180Sstevel@tonic-gate 
35190Sstevel@tonic-gate 	if (&plat_get_cpu_unum) {
35200Sstevel@tonic-gate 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
35210Sstevel@tonic-gate 		    != 0)
35220Sstevel@tonic-gate 			return (ret);
35230Sstevel@tonic-gate 	} else {
35240Sstevel@tonic-gate 		return (ENOTSUP);
35250Sstevel@tonic-gate 	}
35260Sstevel@tonic-gate 
35270Sstevel@tonic-gate 	if (*lenp >= buflen)
35280Sstevel@tonic-gate 		return (ENAMETOOLONG);
35290Sstevel@tonic-gate 
35300Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
35310Sstevel@tonic-gate 
35320Sstevel@tonic-gate 	return (0);
35330Sstevel@tonic-gate }
35340Sstevel@tonic-gate 
35350Sstevel@tonic-gate /*
35360Sstevel@tonic-gate  * This routine exports the name buffer size.
35370Sstevel@tonic-gate  */
35380Sstevel@tonic-gate size_t
35390Sstevel@tonic-gate cpu_get_name_bufsize()
35400Sstevel@tonic-gate {
35410Sstevel@tonic-gate 	return (UNUM_NAMLEN);
35420Sstevel@tonic-gate }
35430Sstevel@tonic-gate 
35440Sstevel@tonic-gate /*
35450Sstevel@tonic-gate  * Historical function, apparantly not used.
35460Sstevel@tonic-gate  */
35470Sstevel@tonic-gate /* ARGSUSED */
35480Sstevel@tonic-gate void
35490Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
35500Sstevel@tonic-gate {}
35510Sstevel@tonic-gate 
35520Sstevel@tonic-gate /*
35530Sstevel@tonic-gate  * Historical function only called for SBus errors in debugging.
35540Sstevel@tonic-gate  */
35550Sstevel@tonic-gate /*ARGSUSED*/
35560Sstevel@tonic-gate void
35570Sstevel@tonic-gate read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
35580Sstevel@tonic-gate {}
35590Sstevel@tonic-gate 
35600Sstevel@tonic-gate /*
35610Sstevel@tonic-gate  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
35620Sstevel@tonic-gate  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
35630Sstevel@tonic-gate  * an async fault structure argument is passed in, the captured error state
35640Sstevel@tonic-gate  * (AFSR, AFAR) info will be returned in the structure.
35650Sstevel@tonic-gate  */
35660Sstevel@tonic-gate int
35670Sstevel@tonic-gate clear_errors(ch_async_flt_t *ch_flt)
35680Sstevel@tonic-gate {
35690Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
35700Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
35710Sstevel@tonic-gate 
35720Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
35730Sstevel@tonic-gate 
35740Sstevel@tonic-gate 	if (ch_flt != NULL) {
35750Sstevel@tonic-gate 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
35760Sstevel@tonic-gate 		aflt->flt_addr = cpu_error_regs.afar;
35770Sstevel@tonic-gate 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
35780Sstevel@tonic-gate 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
35790Sstevel@tonic-gate 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
35800Sstevel@tonic-gate #if defined(SERRANO)
35810Sstevel@tonic-gate 		ch_flt->afar2 = cpu_error_regs.afar2;
35820Sstevel@tonic-gate #endif	/* SERRANO */
35830Sstevel@tonic-gate 	}
35840Sstevel@tonic-gate 
35850Sstevel@tonic-gate 	set_cpu_error_state(&cpu_error_regs);
35860Sstevel@tonic-gate 
35870Sstevel@tonic-gate 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
35880Sstevel@tonic-gate 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
35890Sstevel@tonic-gate }
35900Sstevel@tonic-gate 
35910Sstevel@tonic-gate /*
35920Sstevel@tonic-gate  * Clear any AFSR error bits, and check for persistence.
35930Sstevel@tonic-gate  *
35940Sstevel@tonic-gate  * It would be desirable to also insist that syndrome match.  PCI handling
35950Sstevel@tonic-gate  * has already filled flt_synd.  For errors trapped by CPU we only fill
35960Sstevel@tonic-gate  * flt_synd when we queue the event, so we do not have a valid flt_synd
35970Sstevel@tonic-gate  * during initial classification (it is valid if we're called as part of
35980Sstevel@tonic-gate  * subsequent low-pil additional classification attempts).  We could try
35990Sstevel@tonic-gate  * to determine which syndrome to use: we know we're only called for
36000Sstevel@tonic-gate  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
36010Sstevel@tonic-gate  * would be esynd/none and esynd/msynd, respectively.  If that is
36020Sstevel@tonic-gate  * implemented then what do we do in the case that we do experience an
36030Sstevel@tonic-gate  * error on the same afar but with different syndrome?  At the very least
36040Sstevel@tonic-gate  * we should count such occurences.  Anyway, for now, we'll leave it as
36050Sstevel@tonic-gate  * it has been for ages.
36060Sstevel@tonic-gate  */
36070Sstevel@tonic-gate static int
36080Sstevel@tonic-gate clear_ecc(struct async_flt *aflt)
36090Sstevel@tonic-gate {
36100Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
36110Sstevel@tonic-gate 
36120Sstevel@tonic-gate 	/*
36130Sstevel@tonic-gate 	 * Snapshot the AFSR and AFAR and clear any errors
36140Sstevel@tonic-gate 	 */
36150Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
36160Sstevel@tonic-gate 	set_cpu_error_state(&cpu_error_regs);
36170Sstevel@tonic-gate 
36180Sstevel@tonic-gate 	/*
36190Sstevel@tonic-gate 	 * If any of the same memory access error bits are still on and
36200Sstevel@tonic-gate 	 * the AFAR matches, return that the error is persistent.
36210Sstevel@tonic-gate 	 */
36220Sstevel@tonic-gate 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
36230Sstevel@tonic-gate 	    cpu_error_regs.afar == aflt->flt_addr);
36240Sstevel@tonic-gate }
36250Sstevel@tonic-gate 
36260Sstevel@tonic-gate /*
36270Sstevel@tonic-gate  * Turn off all cpu error detection, normally only used for panics.
36280Sstevel@tonic-gate  */
36290Sstevel@tonic-gate void
36300Sstevel@tonic-gate cpu_disable_errors(void)
36310Sstevel@tonic-gate {
36320Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3633*960Srscott 
3634*960Srscott 	/*
3635*960Srscott 	 * With error detection now turned off, check the other cpus
3636*960Srscott 	 * logout areas for any unlogged errors.
3637*960Srscott 	 */
3638*960Srscott 	if (enable_check_other_cpus_logout) {
3639*960Srscott 		cpu_check_other_cpus_logout();
3640*960Srscott 		/*
3641*960Srscott 		 * Make a second pass over the logout areas, in case
3642*960Srscott 		 * there is a failing CPU in an error-trap loop which
3643*960Srscott 		 * will write to the logout area once it is emptied.
3644*960Srscott 		 */
3645*960Srscott 		cpu_check_other_cpus_logout();
3646*960Srscott 	}
36470Sstevel@tonic-gate }
36480Sstevel@tonic-gate 
36490Sstevel@tonic-gate /*
36500Sstevel@tonic-gate  * Enable errors.
36510Sstevel@tonic-gate  */
36520Sstevel@tonic-gate void
36530Sstevel@tonic-gate cpu_enable_errors(void)
36540Sstevel@tonic-gate {
36550Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
36560Sstevel@tonic-gate }
36570Sstevel@tonic-gate 
36580Sstevel@tonic-gate /*
36590Sstevel@tonic-gate  * Flush the entire ecache using displacement flush by reading through a
36600Sstevel@tonic-gate  * physical address range twice as large as the Ecache.
36610Sstevel@tonic-gate  */
36620Sstevel@tonic-gate void
36630Sstevel@tonic-gate cpu_flush_ecache(void)
36640Sstevel@tonic-gate {
36650Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
36660Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
36670Sstevel@tonic-gate }
36680Sstevel@tonic-gate 
36690Sstevel@tonic-gate /*
36700Sstevel@tonic-gate  * Return CPU E$ set size - E$ size divided by the associativity.
36710Sstevel@tonic-gate  * We use this function in places where the CPU_PRIVATE ptr may not be
36720Sstevel@tonic-gate  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
36730Sstevel@tonic-gate  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
36740Sstevel@tonic-gate  * up before the kernel switches from OBP's to the kernel's trap table, so
36750Sstevel@tonic-gate  * we don't have to worry about cpunodes being unitialized.
36760Sstevel@tonic-gate  */
36770Sstevel@tonic-gate int
36780Sstevel@tonic-gate cpu_ecache_set_size(struct cpu *cp)
36790Sstevel@tonic-gate {
36800Sstevel@tonic-gate 	if (CPU_PRIVATE(cp))
36810Sstevel@tonic-gate 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
36820Sstevel@tonic-gate 
36830Sstevel@tonic-gate 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
36840Sstevel@tonic-gate }
36850Sstevel@tonic-gate 
36860Sstevel@tonic-gate /*
36870Sstevel@tonic-gate  * Flush Ecache line.
36880Sstevel@tonic-gate  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
36890Sstevel@tonic-gate  * Uses normal displacement flush for Cheetah.
36900Sstevel@tonic-gate  */
36910Sstevel@tonic-gate static void
36920Sstevel@tonic-gate cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
36930Sstevel@tonic-gate {
36940Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
36950Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
36960Sstevel@tonic-gate 
36970Sstevel@tonic-gate 	ecache_flush_line(aflt->flt_addr, ec_set_size);
36980Sstevel@tonic-gate }
36990Sstevel@tonic-gate 
37000Sstevel@tonic-gate /*
37010Sstevel@tonic-gate  * Scrub physical address.
37020Sstevel@tonic-gate  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
37030Sstevel@tonic-gate  * Ecache or direct-mapped Ecache.
37040Sstevel@tonic-gate  */
37050Sstevel@tonic-gate static void
37060Sstevel@tonic-gate cpu_scrubphys(struct async_flt *aflt)
37070Sstevel@tonic-gate {
37080Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
37090Sstevel@tonic-gate 
37100Sstevel@tonic-gate 	scrubphys(aflt->flt_addr, ec_set_size);
37110Sstevel@tonic-gate }
37120Sstevel@tonic-gate 
37130Sstevel@tonic-gate /*
37140Sstevel@tonic-gate  * Clear physical address.
37150Sstevel@tonic-gate  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
37160Sstevel@tonic-gate  * Ecache or direct-mapped Ecache.
37170Sstevel@tonic-gate  */
37180Sstevel@tonic-gate void
37190Sstevel@tonic-gate cpu_clearphys(struct async_flt *aflt)
37200Sstevel@tonic-gate {
37210Sstevel@tonic-gate 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
37220Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
37230Sstevel@tonic-gate 
37240Sstevel@tonic-gate 
37250Sstevel@tonic-gate 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
37260Sstevel@tonic-gate }
37270Sstevel@tonic-gate 
37280Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
37290Sstevel@tonic-gate /*
37300Sstevel@tonic-gate  * Check for a matching valid line in all the sets.
37310Sstevel@tonic-gate  * If found, return set# + 1. Otherwise return 0.
37320Sstevel@tonic-gate  */
37330Sstevel@tonic-gate static int
37340Sstevel@tonic-gate cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
37350Sstevel@tonic-gate {
37360Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
37370Sstevel@tonic-gate 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
37380Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
37390Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
37400Sstevel@tonic-gate 	int nway = cpu_ecache_nway();
37410Sstevel@tonic-gate 	int i;
37420Sstevel@tonic-gate 
37430Sstevel@tonic-gate 	for (i = 0; i < nway; i++, ecp++) {
37440Sstevel@tonic-gate 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
37450Sstevel@tonic-gate 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
37460Sstevel@tonic-gate 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
37470Sstevel@tonic-gate 			return (i+1);
37480Sstevel@tonic-gate 	}
37490Sstevel@tonic-gate 	return (0);
37500Sstevel@tonic-gate }
37510Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
37520Sstevel@tonic-gate 
37530Sstevel@tonic-gate /*
37540Sstevel@tonic-gate  * Check whether a line in the given logout info matches the specified
37550Sstevel@tonic-gate  * fault address.  If reqval is set then the line must not be Invalid.
37560Sstevel@tonic-gate  * Returns 0 on failure;  on success (way + 1) is returned an *level is
37570Sstevel@tonic-gate  * set to 2 for l2$ or 3 for l3$.
37580Sstevel@tonic-gate  */
37590Sstevel@tonic-gate static int
37600Sstevel@tonic-gate cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
37610Sstevel@tonic-gate {
37620Sstevel@tonic-gate 	ch_diag_data_t *cdp = data;
37630Sstevel@tonic-gate 	ch_ec_data_t *ecp;
37640Sstevel@tonic-gate 	int totalsize, ec_set_size;
37650Sstevel@tonic-gate 	int i, ways;
37660Sstevel@tonic-gate 	int match = 0;
37670Sstevel@tonic-gate 	int tagvalid;
37680Sstevel@tonic-gate 	uint64_t addr, tagpa;
37690Sstevel@tonic-gate 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
37700Sstevel@tonic-gate 
37710Sstevel@tonic-gate 	/*
37720Sstevel@tonic-gate 	 * Check the l2$ logout data
37730Sstevel@tonic-gate 	 */
37740Sstevel@tonic-gate 	if (ispanther) {
37750Sstevel@tonic-gate 		ecp = &cdp->chd_l2_data[0];
37760Sstevel@tonic-gate 		ec_set_size = PN_L2_SET_SIZE;
37770Sstevel@tonic-gate 		ways = PN_L2_NWAYS;
37780Sstevel@tonic-gate 	} else {
37790Sstevel@tonic-gate 		ecp = &cdp->chd_ec_data[0];
37800Sstevel@tonic-gate 		ec_set_size = cpu_ecache_set_size(CPU);
37810Sstevel@tonic-gate 		ways = cpu_ecache_nway();
37820Sstevel@tonic-gate 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
37830Sstevel@tonic-gate 	}
37840Sstevel@tonic-gate 	/* remove low order PA bits from fault address not used in PA tag */
37850Sstevel@tonic-gate 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
37860Sstevel@tonic-gate 	for (i = 0; i < ways; i++, ecp++) {
37870Sstevel@tonic-gate 		if (ispanther) {
37880Sstevel@tonic-gate 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
37890Sstevel@tonic-gate 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
37900Sstevel@tonic-gate 		} else {
37910Sstevel@tonic-gate 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
37920Sstevel@tonic-gate 			tagvalid = !cpu_ectag_line_invalid(totalsize,
37930Sstevel@tonic-gate 			    ecp->ec_tag);
37940Sstevel@tonic-gate 		}
37950Sstevel@tonic-gate 		if (tagpa == addr && (!reqval || tagvalid)) {
37960Sstevel@tonic-gate 			match = i + 1;
37970Sstevel@tonic-gate 			*level = 2;
37980Sstevel@tonic-gate 			break;
37990Sstevel@tonic-gate 		}
38000Sstevel@tonic-gate 	}
38010Sstevel@tonic-gate 
38020Sstevel@tonic-gate 	if (match || !ispanther)
38030Sstevel@tonic-gate 		return (match);
38040Sstevel@tonic-gate 
38050Sstevel@tonic-gate 	/* For Panther we also check the l3$ */
38060Sstevel@tonic-gate 	ecp = &cdp->chd_ec_data[0];
38070Sstevel@tonic-gate 	ec_set_size = PN_L3_SET_SIZE;
38080Sstevel@tonic-gate 	ways = PN_L3_NWAYS;
38090Sstevel@tonic-gate 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
38100Sstevel@tonic-gate 
38110Sstevel@tonic-gate 	for (i = 0; i < ways; i++, ecp++) {
38120Sstevel@tonic-gate 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
38130Sstevel@tonic-gate 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
38140Sstevel@tonic-gate 			match = i + 1;
38150Sstevel@tonic-gate 			*level = 3;
38160Sstevel@tonic-gate 			break;
38170Sstevel@tonic-gate 		}
38180Sstevel@tonic-gate 	}
38190Sstevel@tonic-gate 
38200Sstevel@tonic-gate 	return (match);
38210Sstevel@tonic-gate }
38220Sstevel@tonic-gate 
38230Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
38240Sstevel@tonic-gate /*
38250Sstevel@tonic-gate  * Record information related to the source of an Dcache Parity Error.
38260Sstevel@tonic-gate  */
38270Sstevel@tonic-gate static void
38280Sstevel@tonic-gate cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
38290Sstevel@tonic-gate {
38300Sstevel@tonic-gate 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
38310Sstevel@tonic-gate 	int index;
38320Sstevel@tonic-gate 
38330Sstevel@tonic-gate 	/*
38340Sstevel@tonic-gate 	 * Since instruction decode cannot be done at high PIL
38350Sstevel@tonic-gate 	 * just examine the entire Dcache to locate the error.
38360Sstevel@tonic-gate 	 */
38370Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
38380Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = -1;
38390Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = -1;
38400Sstevel@tonic-gate 	}
38410Sstevel@tonic-gate 	for (index = 0; index < dc_set_size; index += dcache_linesize)
38420Sstevel@tonic-gate 		cpu_dcache_parity_check(ch_flt, index);
38430Sstevel@tonic-gate }
38440Sstevel@tonic-gate 
38450Sstevel@tonic-gate /*
38460Sstevel@tonic-gate  * Check all ways of the Dcache at a specified index for good parity.
38470Sstevel@tonic-gate  */
38480Sstevel@tonic-gate static void
38490Sstevel@tonic-gate cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
38500Sstevel@tonic-gate {
38510Sstevel@tonic-gate 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
38520Sstevel@tonic-gate 	uint64_t parity_bits, pbits, data_word;
38530Sstevel@tonic-gate 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
38540Sstevel@tonic-gate 	int way, word, data_byte;
38550Sstevel@tonic-gate 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
38560Sstevel@tonic-gate 	ch_dc_data_t tmp_dcp;
38570Sstevel@tonic-gate 
38580Sstevel@tonic-gate 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
38590Sstevel@tonic-gate 		/*
38600Sstevel@tonic-gate 		 * Perform diagnostic read.
38610Sstevel@tonic-gate 		 */
38620Sstevel@tonic-gate 		get_dcache_dtag(index + way * dc_set_size,
38630Sstevel@tonic-gate 				(uint64_t *)&tmp_dcp);
38640Sstevel@tonic-gate 
38650Sstevel@tonic-gate 		/*
38660Sstevel@tonic-gate 		 * Check tag for even parity.
38670Sstevel@tonic-gate 		 * Sum of 1 bits (including parity bit) should be even.
38680Sstevel@tonic-gate 		 */
38690Sstevel@tonic-gate 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
38700Sstevel@tonic-gate 			/*
38710Sstevel@tonic-gate 			 * If this is the first error log detailed information
38720Sstevel@tonic-gate 			 * about it and check the snoop tag. Otherwise just
38730Sstevel@tonic-gate 			 * record the fact that we found another error.
38740Sstevel@tonic-gate 			 */
38750Sstevel@tonic-gate 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
38760Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_way = way;
38770Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_cache =
38780Sstevel@tonic-gate 				    CPU_DC_PARITY;
38790Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
38800Sstevel@tonic-gate 
38810Sstevel@tonic-gate 				if (popc64(tmp_dcp.dc_sntag &
38820Sstevel@tonic-gate 						CHP_DCSNTAG_PARMASK) & 1) {
38830Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_tag |=
38840Sstevel@tonic-gate 								CHP_DC_SNTAG;
38850Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_lcnt++;
38860Sstevel@tonic-gate 				}
38870Sstevel@tonic-gate 
38880Sstevel@tonic-gate 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
38890Sstevel@tonic-gate 			}
38900Sstevel@tonic-gate 
38910Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_lcnt++;
38920Sstevel@tonic-gate 		}
38930Sstevel@tonic-gate 
38940Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
38950Sstevel@tonic-gate 			/*
38960Sstevel@tonic-gate 			 * Panther has more parity bits than the other
38970Sstevel@tonic-gate 			 * processors for covering dcache data and so each
38980Sstevel@tonic-gate 			 * byte of data in each word has its own parity bit.
38990Sstevel@tonic-gate 			 */
39000Sstevel@tonic-gate 			parity_bits = tmp_dcp.dc_pn_data_parity;
39010Sstevel@tonic-gate 			for (word = 0; word < 4; word++) {
39020Sstevel@tonic-gate 				data_word = tmp_dcp.dc_data[word];
39030Sstevel@tonic-gate 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
39040Sstevel@tonic-gate 				for (data_byte = 0; data_byte < 8;
39050Sstevel@tonic-gate 				    data_byte++) {
39060Sstevel@tonic-gate 					if (((popc64(data_word &
39070Sstevel@tonic-gate 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
39080Sstevel@tonic-gate 					    (pbits & 1)) {
39090Sstevel@tonic-gate 						cpu_record_dc_data_parity(
39100Sstevel@tonic-gate 						ch_flt, dcp, &tmp_dcp, way,
39110Sstevel@tonic-gate 						word);
39120Sstevel@tonic-gate 					}
39130Sstevel@tonic-gate 					pbits >>= 1;
39140Sstevel@tonic-gate 					data_word >>= 8;
39150Sstevel@tonic-gate 				}
39160Sstevel@tonic-gate 				parity_bits >>= 8;
39170Sstevel@tonic-gate 			}
39180Sstevel@tonic-gate 		} else {
39190Sstevel@tonic-gate 			/*
39200Sstevel@tonic-gate 			 * Check data array for even parity.
39210Sstevel@tonic-gate 			 * The 8 parity bits are grouped into 4 pairs each
39220Sstevel@tonic-gate 			 * of which covers a 64-bit word.  The endianness is
39230Sstevel@tonic-gate 			 * reversed -- the low-order parity bits cover the
39240Sstevel@tonic-gate 			 * high-order data words.
39250Sstevel@tonic-gate 			 */
39260Sstevel@tonic-gate 			parity_bits = tmp_dcp.dc_utag >> 8;
39270Sstevel@tonic-gate 			for (word = 0; word < 4; word++) {
39280Sstevel@tonic-gate 				pbits = (parity_bits >> (6 - word * 2)) & 3;
39290Sstevel@tonic-gate 				if ((popc64(tmp_dcp.dc_data[word]) +
39300Sstevel@tonic-gate 				    parity_bits_popc[pbits]) & 1) {
39310Sstevel@tonic-gate 					cpu_record_dc_data_parity(ch_flt, dcp,
39320Sstevel@tonic-gate 					    &tmp_dcp, way, word);
39330Sstevel@tonic-gate 				}
39340Sstevel@tonic-gate 			}
39350Sstevel@tonic-gate 		}
39360Sstevel@tonic-gate 	}
39370Sstevel@tonic-gate }
39380Sstevel@tonic-gate 
39390Sstevel@tonic-gate static void
39400Sstevel@tonic-gate cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
39410Sstevel@tonic-gate     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
39420Sstevel@tonic-gate {
39430Sstevel@tonic-gate 	/*
39440Sstevel@tonic-gate 	 * If this is the first error log detailed information about it.
39450Sstevel@tonic-gate 	 * Otherwise just record the fact that we found another error.
39460Sstevel@tonic-gate 	 */
39470Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
39480Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = way;
39490Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
39500Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = word * 8;
39510Sstevel@tonic-gate 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
39520Sstevel@tonic-gate 	}
39530Sstevel@tonic-gate 	ch_flt->parity_data.dpe.cpl_lcnt++;
39540Sstevel@tonic-gate }
39550Sstevel@tonic-gate 
39560Sstevel@tonic-gate /*
39570Sstevel@tonic-gate  * Record information related to the source of an Icache Parity Error.
39580Sstevel@tonic-gate  *
39590Sstevel@tonic-gate  * Called with the Icache disabled so any diagnostic accesses are safe.
39600Sstevel@tonic-gate  */
39610Sstevel@tonic-gate static void
39620Sstevel@tonic-gate cpu_icache_parity_info(ch_async_flt_t *ch_flt)
39630Sstevel@tonic-gate {
39640Sstevel@tonic-gate 	int	ic_set_size;
39650Sstevel@tonic-gate 	int	ic_linesize;
39660Sstevel@tonic-gate 	int	index;
39670Sstevel@tonic-gate 
39680Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU)) {
39690Sstevel@tonic-gate 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
39700Sstevel@tonic-gate 		    CH_ICACHE_NWAY;
39710Sstevel@tonic-gate 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
39720Sstevel@tonic-gate 	} else {
39730Sstevel@tonic-gate 		ic_set_size = icache_size / CH_ICACHE_NWAY;
39740Sstevel@tonic-gate 		ic_linesize = icache_linesize;
39750Sstevel@tonic-gate 	}
39760Sstevel@tonic-gate 
39770Sstevel@tonic-gate 	ch_flt->parity_data.ipe.cpl_way = -1;
39780Sstevel@tonic-gate 	ch_flt->parity_data.ipe.cpl_off = -1;
39790Sstevel@tonic-gate 
39800Sstevel@tonic-gate 	for (index = 0; index < ic_set_size; index += ic_linesize)
39810Sstevel@tonic-gate 		cpu_icache_parity_check(ch_flt, index);
39820Sstevel@tonic-gate }
39830Sstevel@tonic-gate 
39840Sstevel@tonic-gate /*
39850Sstevel@tonic-gate  * Check all ways of the Icache at a specified index for good parity.
39860Sstevel@tonic-gate  */
39870Sstevel@tonic-gate static void
39880Sstevel@tonic-gate cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
39890Sstevel@tonic-gate {
39900Sstevel@tonic-gate 	uint64_t parmask, pn_inst_parity;
39910Sstevel@tonic-gate 	int ic_set_size;
39920Sstevel@tonic-gate 	int ic_linesize;
39930Sstevel@tonic-gate 	int flt_index, way, instr, num_instr;
39940Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
39950Sstevel@tonic-gate 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
39960Sstevel@tonic-gate 	ch_ic_data_t tmp_icp;
39970Sstevel@tonic-gate 
39980Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU)) {
39990Sstevel@tonic-gate 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
40000Sstevel@tonic-gate 		    CH_ICACHE_NWAY;
40010Sstevel@tonic-gate 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
40020Sstevel@tonic-gate 	} else {
40030Sstevel@tonic-gate 		ic_set_size = icache_size / CH_ICACHE_NWAY;
40040Sstevel@tonic-gate 		ic_linesize = icache_linesize;
40050Sstevel@tonic-gate 	}
40060Sstevel@tonic-gate 
40070Sstevel@tonic-gate 	/*
40080Sstevel@tonic-gate 	 * Panther has twice as many instructions per icache line and the
40090Sstevel@tonic-gate 	 * instruction parity bit is in a different location.
40100Sstevel@tonic-gate 	 */
40110Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
40120Sstevel@tonic-gate 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
40130Sstevel@tonic-gate 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
40140Sstevel@tonic-gate 	} else {
40150Sstevel@tonic-gate 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
40160Sstevel@tonic-gate 		pn_inst_parity = 0;
40170Sstevel@tonic-gate 	}
40180Sstevel@tonic-gate 
40190Sstevel@tonic-gate 	/*
40200Sstevel@tonic-gate 	 * Index at which we expect to find the parity error.
40210Sstevel@tonic-gate 	 */
40220Sstevel@tonic-gate 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
40230Sstevel@tonic-gate 
40240Sstevel@tonic-gate 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
40250Sstevel@tonic-gate 		/*
40260Sstevel@tonic-gate 		 * Diagnostic reads expect address argument in ASI format.
40270Sstevel@tonic-gate 		 */
40280Sstevel@tonic-gate 		get_icache_dtag(2 * (index + way * ic_set_size),
40290Sstevel@tonic-gate 				(uint64_t *)&tmp_icp);
40300Sstevel@tonic-gate 
40310Sstevel@tonic-gate 		/*
40320Sstevel@tonic-gate 		 * If this is the index in which we expect to find the
40330Sstevel@tonic-gate 		 * error log detailed information about each of the ways.
40340Sstevel@tonic-gate 		 * This information will be displayed later if we can't
40350Sstevel@tonic-gate 		 * determine the exact way in which the error is located.
40360Sstevel@tonic-gate 		 */
40370Sstevel@tonic-gate 		if (flt_index == index)
40380Sstevel@tonic-gate 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
40390Sstevel@tonic-gate 
40400Sstevel@tonic-gate 		/*
40410Sstevel@tonic-gate 		 * Check tag for even parity.
40420Sstevel@tonic-gate 		 * Sum of 1 bits (including parity bit) should be even.
40430Sstevel@tonic-gate 		 */
40440Sstevel@tonic-gate 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
40450Sstevel@tonic-gate 			/*
40460Sstevel@tonic-gate 			 * If this way is the one in which we expected
40470Sstevel@tonic-gate 			 * to find the error record the way and check the
40480Sstevel@tonic-gate 			 * snoop tag. Otherwise just record the fact we
40490Sstevel@tonic-gate 			 * found another error.
40500Sstevel@tonic-gate 			 */
40510Sstevel@tonic-gate 			if (flt_index == index) {
40520Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_way = way;
40530Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
40540Sstevel@tonic-gate 
40550Sstevel@tonic-gate 				if (popc64(tmp_icp.ic_sntag &
40560Sstevel@tonic-gate 						CHP_ICSNTAG_PARMASK) & 1) {
40570Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_tag |=
40580Sstevel@tonic-gate 								CHP_IC_SNTAG;
40590Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_lcnt++;
40600Sstevel@tonic-gate 				}
40610Sstevel@tonic-gate 
40620Sstevel@tonic-gate 			}
40630Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_lcnt++;
40640Sstevel@tonic-gate 			continue;
40650Sstevel@tonic-gate 		}
40660Sstevel@tonic-gate 
40670Sstevel@tonic-gate 		/*
40680Sstevel@tonic-gate 		 * Check instruction data for even parity.
40690Sstevel@tonic-gate 		 * Bits participating in parity differ for PC-relative
40700Sstevel@tonic-gate 		 * versus non-PC-relative instructions.
40710Sstevel@tonic-gate 		 */
40720Sstevel@tonic-gate 		for (instr = 0; instr < num_instr; instr++) {
40730Sstevel@tonic-gate 			parmask = (tmp_icp.ic_data[instr] &
40740Sstevel@tonic-gate 					CH_ICDATA_PRED_ISPCREL) ?
40750Sstevel@tonic-gate 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
40760Sstevel@tonic-gate 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
40770Sstevel@tonic-gate 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
40780Sstevel@tonic-gate 				/*
40790Sstevel@tonic-gate 				 * If this way is the one in which we expected
40800Sstevel@tonic-gate 				 * to find the error record the way and offset.
40810Sstevel@tonic-gate 				 * Otherwise just log the fact we found another
40820Sstevel@tonic-gate 				 * error.
40830Sstevel@tonic-gate 				 */
40840Sstevel@tonic-gate 				if (flt_index == index) {
40850Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_way = way;
40860Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_off =
40870Sstevel@tonic-gate 								instr * 4;
40880Sstevel@tonic-gate 				}
40890Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_lcnt++;
40900Sstevel@tonic-gate 				continue;
40910Sstevel@tonic-gate 			}
40920Sstevel@tonic-gate 		}
40930Sstevel@tonic-gate 	}
40940Sstevel@tonic-gate }
40950Sstevel@tonic-gate 
40960Sstevel@tonic-gate /*
40970Sstevel@tonic-gate  * Record information related to the source of an Pcache Parity Error.
40980Sstevel@tonic-gate  */
40990Sstevel@tonic-gate static void
41000Sstevel@tonic-gate cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
41010Sstevel@tonic-gate {
41020Sstevel@tonic-gate 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
41030Sstevel@tonic-gate 	int index;
41040Sstevel@tonic-gate 
41050Sstevel@tonic-gate 	/*
41060Sstevel@tonic-gate 	 * Since instruction decode cannot be done at high PIL just
41070Sstevel@tonic-gate 	 * examine the entire Pcache to check for any parity errors.
41080Sstevel@tonic-gate 	 */
41090Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
41100Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = -1;
41110Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = -1;
41120Sstevel@tonic-gate 	}
41130Sstevel@tonic-gate 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
41140Sstevel@tonic-gate 		cpu_pcache_parity_check(ch_flt, index);
41150Sstevel@tonic-gate }
41160Sstevel@tonic-gate 
41170Sstevel@tonic-gate /*
41180Sstevel@tonic-gate  * Check all ways of the Pcache at a specified index for good parity.
41190Sstevel@tonic-gate  */
41200Sstevel@tonic-gate static void
41210Sstevel@tonic-gate cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
41220Sstevel@tonic-gate {
41230Sstevel@tonic-gate 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
41240Sstevel@tonic-gate 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
41250Sstevel@tonic-gate 	int way, word, pbit, parity_bits;
41260Sstevel@tonic-gate 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
41270Sstevel@tonic-gate 	ch_pc_data_t tmp_pcp;
41280Sstevel@tonic-gate 
41290Sstevel@tonic-gate 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
41300Sstevel@tonic-gate 		/*
41310Sstevel@tonic-gate 		 * Perform diagnostic read.
41320Sstevel@tonic-gate 		 */
41330Sstevel@tonic-gate 		get_pcache_dtag(index + way * pc_set_size,
41340Sstevel@tonic-gate 				(uint64_t *)&tmp_pcp);
41350Sstevel@tonic-gate 		/*
41360Sstevel@tonic-gate 		 * Check data array for odd parity. There are 8 parity
41370Sstevel@tonic-gate 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
41380Sstevel@tonic-gate 		 * of those bits covers exactly 8 bytes of the data
41390Sstevel@tonic-gate 		 * array:
41400Sstevel@tonic-gate 		 *
41410Sstevel@tonic-gate 		 *	parity bit	P$ data bytes covered
41420Sstevel@tonic-gate 		 *	----------	---------------------
41430Sstevel@tonic-gate 		 *	50		63:56
41440Sstevel@tonic-gate 		 *	51		55:48
41450Sstevel@tonic-gate 		 *	52		47:40
41460Sstevel@tonic-gate 		 *	53		39:32
41470Sstevel@tonic-gate 		 *	54		31:24
41480Sstevel@tonic-gate 		 *	55		23:16
41490Sstevel@tonic-gate 		 *	56		15:8
41500Sstevel@tonic-gate 		 *	57		7:0
41510Sstevel@tonic-gate 		 */
41520Sstevel@tonic-gate 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
41530Sstevel@tonic-gate 		for (word = 0; word < pc_data_words; word++) {
41540Sstevel@tonic-gate 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
41550Sstevel@tonic-gate 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
41560Sstevel@tonic-gate 				/*
41570Sstevel@tonic-gate 				 * If this is the first error log detailed
41580Sstevel@tonic-gate 				 * information about it. Otherwise just record
41590Sstevel@tonic-gate 				 * the fact that we found another error.
41600Sstevel@tonic-gate 				 */
41610Sstevel@tonic-gate 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
41620Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_way = way;
41630Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_cache =
41640Sstevel@tonic-gate 					    CPU_PC_PARITY;
41650Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_off =
41660Sstevel@tonic-gate 					    word * sizeof (uint64_t);
41670Sstevel@tonic-gate 					bcopy(&tmp_pcp, pcp,
41680Sstevel@tonic-gate 							sizeof (ch_pc_data_t));
41690Sstevel@tonic-gate 				}
41700Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_lcnt++;
41710Sstevel@tonic-gate 			}
41720Sstevel@tonic-gate 		}
41730Sstevel@tonic-gate 	}
41740Sstevel@tonic-gate }
41750Sstevel@tonic-gate 
41760Sstevel@tonic-gate 
41770Sstevel@tonic-gate /*
41780Sstevel@tonic-gate  * Add L1 Data cache data to the ereport payload.
41790Sstevel@tonic-gate  */
41800Sstevel@tonic-gate static void
41810Sstevel@tonic-gate cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
41820Sstevel@tonic-gate {
41830Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
41840Sstevel@tonic-gate 	ch_dc_data_t *dcp;
41850Sstevel@tonic-gate 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
41860Sstevel@tonic-gate 	uint_t nelem;
41870Sstevel@tonic-gate 	int i, ways_to_check, ways_logged = 0;
41880Sstevel@tonic-gate 
41890Sstevel@tonic-gate 	/*
41900Sstevel@tonic-gate 	 * If this is an D$ fault then there may be multiple
41910Sstevel@tonic-gate 	 * ways captured in the ch_parity_log_t structure.
41920Sstevel@tonic-gate 	 * Otherwise, there will be at most one way captured
41930Sstevel@tonic-gate 	 * in the ch_diag_data_t struct.
41940Sstevel@tonic-gate 	 * Check each way to see if it should be encoded.
41950Sstevel@tonic-gate 	 */
41960Sstevel@tonic-gate 	if (ch_flt->flt_type == CPU_DC_PARITY)
41970Sstevel@tonic-gate 		ways_to_check = CH_DCACHE_NWAY;
41980Sstevel@tonic-gate 	else
41990Sstevel@tonic-gate 		ways_to_check = 1;
42000Sstevel@tonic-gate 	for (i = 0; i < ways_to_check; i++) {
42010Sstevel@tonic-gate 		if (ch_flt->flt_type == CPU_DC_PARITY)
42020Sstevel@tonic-gate 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
42030Sstevel@tonic-gate 		else
42040Sstevel@tonic-gate 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
42050Sstevel@tonic-gate 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
42060Sstevel@tonic-gate 			bcopy(dcp, &dcdata[ways_logged],
42070Sstevel@tonic-gate 				sizeof (ch_dc_data_t));
42080Sstevel@tonic-gate 			ways_logged++;
42090Sstevel@tonic-gate 		}
42100Sstevel@tonic-gate 	}
42110Sstevel@tonic-gate 
42120Sstevel@tonic-gate 	/*
42130Sstevel@tonic-gate 	 * Add the dcache data to the payload.
42140Sstevel@tonic-gate 	 */
42150Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
42160Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
42170Sstevel@tonic-gate 	if (ways_logged != 0) {
42180Sstevel@tonic-gate 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
42190Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
42200Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
42210Sstevel@tonic-gate 	}
42220Sstevel@tonic-gate }
42230Sstevel@tonic-gate 
42240Sstevel@tonic-gate /*
42250Sstevel@tonic-gate  * Add L1 Instruction cache data to the ereport payload.
42260Sstevel@tonic-gate  */
42270Sstevel@tonic-gate static void
42280Sstevel@tonic-gate cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
42290Sstevel@tonic-gate {
42300Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
42310Sstevel@tonic-gate 	ch_ic_data_t *icp;
42320Sstevel@tonic-gate 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
42330Sstevel@tonic-gate 	uint_t nelem;
42340Sstevel@tonic-gate 	int i, ways_to_check, ways_logged = 0;
42350Sstevel@tonic-gate 
42360Sstevel@tonic-gate 	/*
42370Sstevel@tonic-gate 	 * If this is an I$ fault then there may be multiple
42380Sstevel@tonic-gate 	 * ways captured in the ch_parity_log_t structure.
42390Sstevel@tonic-gate 	 * Otherwise, there will be at most one way captured
42400Sstevel@tonic-gate 	 * in the ch_diag_data_t struct.
42410Sstevel@tonic-gate 	 * Check each way to see if it should be encoded.
42420Sstevel@tonic-gate 	 */
42430Sstevel@tonic-gate 	if (ch_flt->flt_type == CPU_IC_PARITY)
42440Sstevel@tonic-gate 		ways_to_check = CH_ICACHE_NWAY;
42450Sstevel@tonic-gate 	else
42460Sstevel@tonic-gate 		ways_to_check = 1;
42470Sstevel@tonic-gate 	for (i = 0; i < ways_to_check; i++) {
42480Sstevel@tonic-gate 		if (ch_flt->flt_type == CPU_IC_PARITY)
42490Sstevel@tonic-gate 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
42500Sstevel@tonic-gate 		else
42510Sstevel@tonic-gate 			icp = &ch_flt->flt_diag_data.chd_ic_data;
42520Sstevel@tonic-gate 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
42530Sstevel@tonic-gate 			bcopy(icp, &icdata[ways_logged],
42540Sstevel@tonic-gate 				sizeof (ch_ic_data_t));
42550Sstevel@tonic-gate 			ways_logged++;
42560Sstevel@tonic-gate 		}
42570Sstevel@tonic-gate 	}
42580Sstevel@tonic-gate 
42590Sstevel@tonic-gate 	/*
42600Sstevel@tonic-gate 	 * Add the icache data to the payload.
42610Sstevel@tonic-gate 	 */
42620Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
42630Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
42640Sstevel@tonic-gate 	if (ways_logged != 0) {
42650Sstevel@tonic-gate 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
42660Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
42670Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
42680Sstevel@tonic-gate 	}
42690Sstevel@tonic-gate }
42700Sstevel@tonic-gate 
42710Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
42720Sstevel@tonic-gate 
42730Sstevel@tonic-gate /*
42740Sstevel@tonic-gate  * Add ecache data to payload.
42750Sstevel@tonic-gate  */
42760Sstevel@tonic-gate static void
42770Sstevel@tonic-gate cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
42780Sstevel@tonic-gate {
42790Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
42800Sstevel@tonic-gate 	ch_ec_data_t *ecp;
42810Sstevel@tonic-gate 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
42820Sstevel@tonic-gate 	uint_t nelem;
42830Sstevel@tonic-gate 	int i, ways_logged = 0;
42840Sstevel@tonic-gate 
42850Sstevel@tonic-gate 	/*
42860Sstevel@tonic-gate 	 * Check each way to see if it should be encoded
42870Sstevel@tonic-gate 	 * and concatinate it into a temporary buffer.
42880Sstevel@tonic-gate 	 */
42890Sstevel@tonic-gate 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
42900Sstevel@tonic-gate 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
42910Sstevel@tonic-gate 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
42920Sstevel@tonic-gate 			bcopy(ecp, &ecdata[ways_logged],
42930Sstevel@tonic-gate 				sizeof (ch_ec_data_t));
42940Sstevel@tonic-gate 			ways_logged++;
42950Sstevel@tonic-gate 		}
42960Sstevel@tonic-gate 	}
42970Sstevel@tonic-gate 
42980Sstevel@tonic-gate 	/*
42990Sstevel@tonic-gate 	 * Panther CPUs have an additional level of cache and so
43000Sstevel@tonic-gate 	 * what we just collected was the L3 (ecache) and not the
43010Sstevel@tonic-gate 	 * L2 cache.
43020Sstevel@tonic-gate 	 */
43030Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
43040Sstevel@tonic-gate 		/*
43050Sstevel@tonic-gate 		 * Add the L3 (ecache) data to the payload.
43060Sstevel@tonic-gate 		 */
43070Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
43080Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
43090Sstevel@tonic-gate 		if (ways_logged != 0) {
43100Sstevel@tonic-gate 			nelem = sizeof (ch_ec_data_t) /
43110Sstevel@tonic-gate 			    sizeof (uint64_t) * ways_logged;
43120Sstevel@tonic-gate 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
43130Sstevel@tonic-gate 			    DATA_TYPE_UINT64_ARRAY, nelem,
43140Sstevel@tonic-gate 			    (uint64_t *)ecdata, NULL);
43150Sstevel@tonic-gate 		}
43160Sstevel@tonic-gate 
43170Sstevel@tonic-gate 		/*
43180Sstevel@tonic-gate 		 * Now collect the L2 cache.
43190Sstevel@tonic-gate 		 */
43200Sstevel@tonic-gate 		ways_logged = 0;
43210Sstevel@tonic-gate 		for (i = 0; i < PN_L2_NWAYS; i++) {
43220Sstevel@tonic-gate 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
43230Sstevel@tonic-gate 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
43240Sstevel@tonic-gate 				bcopy(ecp, &ecdata[ways_logged],
43250Sstevel@tonic-gate 				    sizeof (ch_ec_data_t));
43260Sstevel@tonic-gate 				ways_logged++;
43270Sstevel@tonic-gate 			}
43280Sstevel@tonic-gate 		}
43290Sstevel@tonic-gate 	}
43300Sstevel@tonic-gate 
43310Sstevel@tonic-gate 	/*
43320Sstevel@tonic-gate 	 * Add the L2 cache data to the payload.
43330Sstevel@tonic-gate 	 */
43340Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
43350Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
43360Sstevel@tonic-gate 	if (ways_logged != 0) {
43370Sstevel@tonic-gate 		nelem = sizeof (ch_ec_data_t) /
43380Sstevel@tonic-gate 			sizeof (uint64_t) * ways_logged;
43390Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
43400Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
43410Sstevel@tonic-gate 	}
43420Sstevel@tonic-gate }
43430Sstevel@tonic-gate 
43440Sstevel@tonic-gate /*
43450Sstevel@tonic-gate  * Encode the data saved in the ch_async_flt_t struct into
43460Sstevel@tonic-gate  * the FM ereport payload.
43470Sstevel@tonic-gate  */
43480Sstevel@tonic-gate static void
43490Sstevel@tonic-gate cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
43500Sstevel@tonic-gate 	nvlist_t *resource, int *afar_status, int *synd_status)
43510Sstevel@tonic-gate {
43520Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
43530Sstevel@tonic-gate 	*synd_status = AFLT_STAT_INVALID;
43540Sstevel@tonic-gate 	*afar_status = AFLT_STAT_INVALID;
43550Sstevel@tonic-gate 
43560Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
43570Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
43580Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
43590Sstevel@tonic-gate 	}
43600Sstevel@tonic-gate 
43610Sstevel@tonic-gate 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
43620Sstevel@tonic-gate 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
43630Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
43640Sstevel@tonic-gate 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
43650Sstevel@tonic-gate 	}
43660Sstevel@tonic-gate 
43670Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
43680Sstevel@tonic-gate 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
43690Sstevel@tonic-gate 		    ch_flt->flt_bit);
43700Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
43710Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
43720Sstevel@tonic-gate 	}
43730Sstevel@tonic-gate 
43740Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
43750Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
43760Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
43770Sstevel@tonic-gate 	}
43780Sstevel@tonic-gate 
43790Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
43800Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
43810Sstevel@tonic-gate 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
43820Sstevel@tonic-gate 	}
43830Sstevel@tonic-gate 
43840Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
43850Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
43860Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
43870Sstevel@tonic-gate 	}
43880Sstevel@tonic-gate 
43890Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
43900Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
43910Sstevel@tonic-gate 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
43920Sstevel@tonic-gate 	}
43930Sstevel@tonic-gate 
43940Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
43950Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
43960Sstevel@tonic-gate 		    DATA_TYPE_BOOLEAN_VALUE,
43970Sstevel@tonic-gate 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
43980Sstevel@tonic-gate 	}
43990Sstevel@tonic-gate 
44000Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
44010Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
44020Sstevel@tonic-gate 		    DATA_TYPE_BOOLEAN_VALUE,
44030Sstevel@tonic-gate 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
44040Sstevel@tonic-gate 	}
44050Sstevel@tonic-gate 
44060Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
44070Sstevel@tonic-gate 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
44080Sstevel@tonic-gate 		    ch_flt->afsr_errs, ch_flt->flt_bit);
44090Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
44100Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
44110Sstevel@tonic-gate 	}
44120Sstevel@tonic-gate 
44130Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
44140Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
44150Sstevel@tonic-gate 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
44160Sstevel@tonic-gate 	}
44170Sstevel@tonic-gate 
44180Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
44190Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
44200Sstevel@tonic-gate 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
44210Sstevel@tonic-gate 	}
44220Sstevel@tonic-gate 
44230Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
44240Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
44250Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
44260Sstevel@tonic-gate 	}
44270Sstevel@tonic-gate 
44280Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
44290Sstevel@tonic-gate 		cpu_payload_add_ecache(aflt, payload);
44300Sstevel@tonic-gate 
44310Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
44320Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
44330Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
44340Sstevel@tonic-gate 	}
44350Sstevel@tonic-gate 
44360Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
44370Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
44380Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
44390Sstevel@tonic-gate 	}
44400Sstevel@tonic-gate 
44410Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
44420Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
44430Sstevel@tonic-gate 		    DATA_TYPE_UINT32_ARRAY, 16,
44440Sstevel@tonic-gate 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
44450Sstevel@tonic-gate 	}
44460Sstevel@tonic-gate 
44470Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
44480Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
44490Sstevel@tonic-gate 		cpu_payload_add_dcache(aflt, payload);
44500Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
44510Sstevel@tonic-gate 		cpu_payload_add_icache(aflt, payload);
44520Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
44530Sstevel@tonic-gate 
44540Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
44550Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
44560Sstevel@tonic-gate 		cpu_payload_add_pcache(aflt, payload);
44570Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
44580Sstevel@tonic-gate 		cpu_payload_add_tlb(aflt, payload);
44590Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
44600Sstevel@tonic-gate 	/*
44610Sstevel@tonic-gate 	 * Create the FMRI that goes into the payload
44620Sstevel@tonic-gate 	 * and contains the unum info if necessary.
44630Sstevel@tonic-gate 	 */
44640Sstevel@tonic-gate 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
44650Sstevel@tonic-gate 	    (*afar_status == AFLT_STAT_VALID)) {
44660Sstevel@tonic-gate 		char unum[UNUM_NAMLEN];
44670Sstevel@tonic-gate 		int len;
44680Sstevel@tonic-gate 
44690Sstevel@tonic-gate 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
44700Sstevel@tonic-gate 		    UNUM_NAMLEN, &len) == 0) {
44710Sstevel@tonic-gate 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
44720Sstevel@tonic-gate 			    NULL, unum, NULL);
44730Sstevel@tonic-gate 			fm_payload_set(payload,
44740Sstevel@tonic-gate 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
44750Sstevel@tonic-gate 			    DATA_TYPE_NVLIST, resource, NULL);
44760Sstevel@tonic-gate 		}
44770Sstevel@tonic-gate 	}
44780Sstevel@tonic-gate }
44790Sstevel@tonic-gate 
44800Sstevel@tonic-gate /*
44810Sstevel@tonic-gate  * Initialize the way info if necessary.
44820Sstevel@tonic-gate  */
44830Sstevel@tonic-gate void
44840Sstevel@tonic-gate cpu_ereport_init(struct async_flt *aflt)
44850Sstevel@tonic-gate {
44860Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
44870Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
44880Sstevel@tonic-gate 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
44890Sstevel@tonic-gate 	int i;
44900Sstevel@tonic-gate 
44910Sstevel@tonic-gate 	/*
44920Sstevel@tonic-gate 	 * Initialize the info in the CPU logout structure.
44930Sstevel@tonic-gate 	 * The I$/D$ way information is not initialized here
44940Sstevel@tonic-gate 	 * since it is captured in the logout assembly code.
44950Sstevel@tonic-gate 	 */
44960Sstevel@tonic-gate 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
44970Sstevel@tonic-gate 		(ecp + i)->ec_way = i;
44980Sstevel@tonic-gate 
44990Sstevel@tonic-gate 	for (i = 0; i < PN_L2_NWAYS; i++)
45000Sstevel@tonic-gate 		(l2p + i)->ec_way = i;
45010Sstevel@tonic-gate }
45020Sstevel@tonic-gate 
45030Sstevel@tonic-gate /*
45040Sstevel@tonic-gate  * Returns whether fault address is valid for this error bit and
45050Sstevel@tonic-gate  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
45060Sstevel@tonic-gate  */
45070Sstevel@tonic-gate int
45080Sstevel@tonic-gate cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
45090Sstevel@tonic-gate {
45100Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
45110Sstevel@tonic-gate 
45120Sstevel@tonic-gate 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
45130Sstevel@tonic-gate 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
45140Sstevel@tonic-gate 	    AFLT_STAT_VALID &&
45150Sstevel@tonic-gate 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
45160Sstevel@tonic-gate }
45170Sstevel@tonic-gate 
45180Sstevel@tonic-gate static void
45190Sstevel@tonic-gate cpu_log_diag_info(ch_async_flt_t *ch_flt)
45200Sstevel@tonic-gate {
45210Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
45220Sstevel@tonic-gate 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
45230Sstevel@tonic-gate 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
45240Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
45250Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
45260Sstevel@tonic-gate 	int i, nway;
45270Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
45280Sstevel@tonic-gate 
45290Sstevel@tonic-gate 	/*
45300Sstevel@tonic-gate 	 * Check if the CPU log out captured was valid.
45310Sstevel@tonic-gate 	 */
45320Sstevel@tonic-gate 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
45330Sstevel@tonic-gate 	    ch_flt->flt_data_incomplete)
45340Sstevel@tonic-gate 		return;
45350Sstevel@tonic-gate 
45360Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
45370Sstevel@tonic-gate 	nway = cpu_ecache_nway();
45380Sstevel@tonic-gate 	i =  cpu_ecache_line_valid(ch_flt);
45390Sstevel@tonic-gate 	if (i == 0 || i > nway) {
45400Sstevel@tonic-gate 		for (i = 0; i < nway; i++)
45410Sstevel@tonic-gate 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
45420Sstevel@tonic-gate 	} else
45430Sstevel@tonic-gate 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
45440Sstevel@tonic-gate #else /* CPU_IMP_ECACHE_ASSOC */
45450Sstevel@tonic-gate 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
45460Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
45470Sstevel@tonic-gate 
45480Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
45490Sstevel@tonic-gate 	pn_cpu_log_diag_l2_info(ch_flt);
45500Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
45510Sstevel@tonic-gate 
45520Sstevel@tonic-gate 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
45530Sstevel@tonic-gate 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
45540Sstevel@tonic-gate 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
45550Sstevel@tonic-gate 	}
45560Sstevel@tonic-gate 
45570Sstevel@tonic-gate 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
45580Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
45590Sstevel@tonic-gate 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
45600Sstevel@tonic-gate 		else
45610Sstevel@tonic-gate 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
45620Sstevel@tonic-gate 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
45630Sstevel@tonic-gate 	}
45640Sstevel@tonic-gate }
45650Sstevel@tonic-gate 
45660Sstevel@tonic-gate /*
45670Sstevel@tonic-gate  * Cheetah ECC calculation.
45680Sstevel@tonic-gate  *
45690Sstevel@tonic-gate  * We only need to do the calculation on the data bits and can ignore check
45700Sstevel@tonic-gate  * bit and Mtag bit terms in the calculation.
45710Sstevel@tonic-gate  */
45720Sstevel@tonic-gate static uint64_t ch_ecc_table[9][2] = {
45730Sstevel@tonic-gate 	/*
45740Sstevel@tonic-gate 	 * low order 64-bits   high-order 64-bits
45750Sstevel@tonic-gate 	 */
45760Sstevel@tonic-gate 	{ 0x46bffffeccd1177f, 0x488800022100014c },
45770Sstevel@tonic-gate 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
45780Sstevel@tonic-gate 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
45790Sstevel@tonic-gate 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
45800Sstevel@tonic-gate 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
45810Sstevel@tonic-gate 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
45820Sstevel@tonic-gate 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
45830Sstevel@tonic-gate 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
45840Sstevel@tonic-gate 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
45850Sstevel@tonic-gate };
45860Sstevel@tonic-gate 
45870Sstevel@tonic-gate /*
45880Sstevel@tonic-gate  * 64-bit population count, use well-known popcnt trick.
45890Sstevel@tonic-gate  * We could use the UltraSPARC V9 POPC instruction, but some
45900Sstevel@tonic-gate  * CPUs including Cheetahplus and Jaguar do not support that
45910Sstevel@tonic-gate  * instruction.
45920Sstevel@tonic-gate  */
45930Sstevel@tonic-gate int
45940Sstevel@tonic-gate popc64(uint64_t val)
45950Sstevel@tonic-gate {
45960Sstevel@tonic-gate 	int cnt;
45970Sstevel@tonic-gate 
45980Sstevel@tonic-gate 	for (cnt = 0; val != 0; val &= val - 1)
45990Sstevel@tonic-gate 		cnt++;
46000Sstevel@tonic-gate 	return (cnt);
46010Sstevel@tonic-gate }
46020Sstevel@tonic-gate 
46030Sstevel@tonic-gate /*
46040Sstevel@tonic-gate  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
46050Sstevel@tonic-gate  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
46060Sstevel@tonic-gate  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
46070Sstevel@tonic-gate  * instead of doing all the xor's.
46080Sstevel@tonic-gate  */
46090Sstevel@tonic-gate uint32_t
46100Sstevel@tonic-gate us3_gen_ecc(uint64_t data_low, uint64_t data_high)
46110Sstevel@tonic-gate {
46120Sstevel@tonic-gate 	int bitno, s;
46130Sstevel@tonic-gate 	int synd = 0;
46140Sstevel@tonic-gate 
46150Sstevel@tonic-gate 	for (bitno = 0; bitno < 9; bitno++) {
46160Sstevel@tonic-gate 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
46170Sstevel@tonic-gate 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
46180Sstevel@tonic-gate 		synd |= (s << bitno);
46190Sstevel@tonic-gate 	}
46200Sstevel@tonic-gate 	return (synd);
46210Sstevel@tonic-gate 
46220Sstevel@tonic-gate }
46230Sstevel@tonic-gate 
46240Sstevel@tonic-gate /*
46250Sstevel@tonic-gate  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
46260Sstevel@tonic-gate  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
46270Sstevel@tonic-gate  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
46280Sstevel@tonic-gate  */
46290Sstevel@tonic-gate static void
46300Sstevel@tonic-gate cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
46310Sstevel@tonic-gate     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
46320Sstevel@tonic-gate {
46330Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
46340Sstevel@tonic-gate 
46350Sstevel@tonic-gate 	if (reason &&
46360Sstevel@tonic-gate 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
46370Sstevel@tonic-gate 		(void) strcat(reason, eccp->ec_reason);
46380Sstevel@tonic-gate 	}
46390Sstevel@tonic-gate 
46400Sstevel@tonic-gate 	ch_flt->flt_bit = eccp->ec_afsr_bit;
46410Sstevel@tonic-gate 	ch_flt->flt_type = eccp->ec_flt_type;
46420Sstevel@tonic-gate 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
46430Sstevel@tonic-gate 		ch_flt->flt_diag_data = *cdp;
46440Sstevel@tonic-gate 	else
46450Sstevel@tonic-gate 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
46460Sstevel@tonic-gate 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
46470Sstevel@tonic-gate 
46480Sstevel@tonic-gate 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
46490Sstevel@tonic-gate 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
46500Sstevel@tonic-gate 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
46510Sstevel@tonic-gate 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
46520Sstevel@tonic-gate 	else
46530Sstevel@tonic-gate 		aflt->flt_synd = 0;
46540Sstevel@tonic-gate 
46550Sstevel@tonic-gate 	aflt->flt_payload = eccp->ec_err_payload;
46560Sstevel@tonic-gate 
46570Sstevel@tonic-gate 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
46580Sstevel@tonic-gate 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
46590Sstevel@tonic-gate 		cpu_errorq_dispatch(eccp->ec_err_class,
46600Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
46610Sstevel@tonic-gate 		    aflt->flt_panic);
46620Sstevel@tonic-gate 	else
46630Sstevel@tonic-gate 		cpu_errorq_dispatch(eccp->ec_err_class,
46640Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
46650Sstevel@tonic-gate 		    aflt->flt_panic);
46660Sstevel@tonic-gate }
46670Sstevel@tonic-gate 
46680Sstevel@tonic-gate /*
46690Sstevel@tonic-gate  * Queue events on async event queue one event per error bit.  First we
46700Sstevel@tonic-gate  * queue the events that we "expect" for the given trap, then we queue events
46710Sstevel@tonic-gate  * that we may not expect.  Return number of events queued.
46720Sstevel@tonic-gate  */
46730Sstevel@tonic-gate int
46740Sstevel@tonic-gate cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
46750Sstevel@tonic-gate     ch_cpu_logout_t *clop)
46760Sstevel@tonic-gate {
46770Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
46780Sstevel@tonic-gate 	ecc_type_to_info_t *eccp;
46790Sstevel@tonic-gate 	int nevents = 0;
46800Sstevel@tonic-gate 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
46810Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
46820Sstevel@tonic-gate 	uint64_t orig_t_afsr_errs;
46830Sstevel@tonic-gate #endif
46840Sstevel@tonic-gate 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
46850Sstevel@tonic-gate 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
46860Sstevel@tonic-gate 	ch_diag_data_t *cdp = NULL;
46870Sstevel@tonic-gate 
46880Sstevel@tonic-gate 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
46890Sstevel@tonic-gate 
46900Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
46910Sstevel@tonic-gate 	orig_t_afsr_errs = t_afsr_errs;
46920Sstevel@tonic-gate 
46930Sstevel@tonic-gate 	/*
46940Sstevel@tonic-gate 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
46950Sstevel@tonic-gate 	 */
46960Sstevel@tonic-gate 	if (clop != NULL) {
46970Sstevel@tonic-gate 		/*
46980Sstevel@tonic-gate 		 * Set the AFSR and AFAR fields to the shadow registers.  The
46990Sstevel@tonic-gate 		 * flt_addr and flt_stat fields will be reset to the primaries
47000Sstevel@tonic-gate 		 * below, but the sdw_addr and sdw_stat will stay as the
47010Sstevel@tonic-gate 		 * secondaries.
47020Sstevel@tonic-gate 		 */
47030Sstevel@tonic-gate 		cdp = &clop->clo_sdw_data;
47040Sstevel@tonic-gate 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
47050Sstevel@tonic-gate 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
47060Sstevel@tonic-gate 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
47070Sstevel@tonic-gate 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
47080Sstevel@tonic-gate 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
47090Sstevel@tonic-gate 
47100Sstevel@tonic-gate 		/*
47110Sstevel@tonic-gate 		 * If the primary and shadow AFSR differ, tag the shadow as
47120Sstevel@tonic-gate 		 * the first fault.
47130Sstevel@tonic-gate 		 */
47140Sstevel@tonic-gate 		if ((primary_afar != cdp->chd_afar) ||
47150Sstevel@tonic-gate 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
47160Sstevel@tonic-gate 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
47170Sstevel@tonic-gate 		}
47180Sstevel@tonic-gate 
47190Sstevel@tonic-gate 		/*
47200Sstevel@tonic-gate 		 * Check AFSR bits as well as AFSR_EXT bits in order of
47210Sstevel@tonic-gate 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
47220Sstevel@tonic-gate 		 * is expected to be zero for those CPUs which do not have
47230Sstevel@tonic-gate 		 * an AFSR_EXT register.
47240Sstevel@tonic-gate 		 */
47250Sstevel@tonic-gate 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
47260Sstevel@tonic-gate 			if ((eccp->ec_afsr_bit &
47270Sstevel@tonic-gate 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
47280Sstevel@tonic-gate 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
47290Sstevel@tonic-gate 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
47300Sstevel@tonic-gate 				cdp = NULL;
47310Sstevel@tonic-gate 				t_afsr_errs &= ~eccp->ec_afsr_bit;
47320Sstevel@tonic-gate 				nevents++;
47330Sstevel@tonic-gate 			}
47340Sstevel@tonic-gate 		}
47350Sstevel@tonic-gate 
47360Sstevel@tonic-gate 		/*
47370Sstevel@tonic-gate 		 * If the ME bit is on in the primary AFSR turn all the
47380Sstevel@tonic-gate 		 * error bits on again that may set the ME bit to make
47390Sstevel@tonic-gate 		 * sure we see the ME AFSR error logs.
47400Sstevel@tonic-gate 		 */
47410Sstevel@tonic-gate 		if ((primary_afsr & C_AFSR_ME) != 0)
47420Sstevel@tonic-gate 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
47430Sstevel@tonic-gate 	}
47440Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
47450Sstevel@tonic-gate 
47460Sstevel@tonic-gate 	if (clop != NULL)
47470Sstevel@tonic-gate 		cdp = &clop->clo_data;
47480Sstevel@tonic-gate 
47490Sstevel@tonic-gate 	/*
47500Sstevel@tonic-gate 	 * Queue expected errors, error bit and fault type must match
47510Sstevel@tonic-gate 	 * in the ecc_type_to_info table.
47520Sstevel@tonic-gate 	 */
47530Sstevel@tonic-gate 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
47540Sstevel@tonic-gate 	    eccp++) {
47550Sstevel@tonic-gate 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
47560Sstevel@tonic-gate 		    (eccp->ec_flags & aflt->flt_status) != 0) {
47570Sstevel@tonic-gate #if defined(SERRANO)
47580Sstevel@tonic-gate 			/*
47590Sstevel@tonic-gate 			 * For FRC/FRU errors on Serrano the afar2 captures
47600Sstevel@tonic-gate 			 * the address and the associated data is
47610Sstevel@tonic-gate 			 * in the shadow logout area.
47620Sstevel@tonic-gate 			 */
47630Sstevel@tonic-gate 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
47640Sstevel@tonic-gate 				if (clop != NULL)
47650Sstevel@tonic-gate 					cdp = &clop->clo_sdw_data;
47660Sstevel@tonic-gate 				aflt->flt_addr = ch_flt->afar2;
47670Sstevel@tonic-gate 			} else {
47680Sstevel@tonic-gate 				if (clop != NULL)
47690Sstevel@tonic-gate 					cdp = &clop->clo_data;
47700Sstevel@tonic-gate 				aflt->flt_addr = primary_afar;
47710Sstevel@tonic-gate 			}
47720Sstevel@tonic-gate #else	/* SERRANO */
47730Sstevel@tonic-gate 			aflt->flt_addr = primary_afar;
47740Sstevel@tonic-gate #endif	/* SERRANO */
47750Sstevel@tonic-gate 			aflt->flt_stat = primary_afsr;
47760Sstevel@tonic-gate 			ch_flt->afsr_ext = primary_afsr_ext;
47770Sstevel@tonic-gate 			ch_flt->afsr_errs = primary_afsr_errs;
47780Sstevel@tonic-gate 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
47790Sstevel@tonic-gate 			cdp = NULL;
47800Sstevel@tonic-gate 			t_afsr_errs &= ~eccp->ec_afsr_bit;
47810Sstevel@tonic-gate 			nevents++;
47820Sstevel@tonic-gate 		}
47830Sstevel@tonic-gate 	}
47840Sstevel@tonic-gate 
47850Sstevel@tonic-gate 	/*
47860Sstevel@tonic-gate 	 * Queue unexpected errors, error bit only match.
47870Sstevel@tonic-gate 	 */
47880Sstevel@tonic-gate 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
47890Sstevel@tonic-gate 	    eccp++) {
47900Sstevel@tonic-gate 		if (eccp->ec_afsr_bit & t_afsr_errs) {
47910Sstevel@tonic-gate #if defined(SERRANO)
47920Sstevel@tonic-gate 			/*
47930Sstevel@tonic-gate 			 * For FRC/FRU errors on Serrano the afar2 captures
47940Sstevel@tonic-gate 			 * the address and the associated data is
47950Sstevel@tonic-gate 			 * in the shadow logout area.
47960Sstevel@tonic-gate 			 */
47970Sstevel@tonic-gate 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
47980Sstevel@tonic-gate 				if (clop != NULL)
47990Sstevel@tonic-gate 					cdp = &clop->clo_sdw_data;
48000Sstevel@tonic-gate 				aflt->flt_addr = ch_flt->afar2;
48010Sstevel@tonic-gate 			} else {
48020Sstevel@tonic-gate 				if (clop != NULL)
48030Sstevel@tonic-gate 					cdp = &clop->clo_data;
48040Sstevel@tonic-gate 				aflt->flt_addr = primary_afar;
48050Sstevel@tonic-gate 			}
48060Sstevel@tonic-gate #else	/* SERRANO */
48070Sstevel@tonic-gate 			aflt->flt_addr = primary_afar;
48080Sstevel@tonic-gate #endif	/* SERRANO */
48090Sstevel@tonic-gate 			aflt->flt_stat = primary_afsr;
48100Sstevel@tonic-gate 			ch_flt->afsr_ext = primary_afsr_ext;
48110Sstevel@tonic-gate 			ch_flt->afsr_errs = primary_afsr_errs;
48120Sstevel@tonic-gate 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
48130Sstevel@tonic-gate 			cdp = NULL;
48140Sstevel@tonic-gate 			t_afsr_errs &= ~eccp->ec_afsr_bit;
48150Sstevel@tonic-gate 			nevents++;
48160Sstevel@tonic-gate 		}
48170Sstevel@tonic-gate 	}
48180Sstevel@tonic-gate 	return (nevents);
48190Sstevel@tonic-gate }
48200Sstevel@tonic-gate 
48210Sstevel@tonic-gate /*
48220Sstevel@tonic-gate  * Return trap type number.
48230Sstevel@tonic-gate  */
48240Sstevel@tonic-gate uint8_t
48250Sstevel@tonic-gate flt_to_trap_type(struct async_flt *aflt)
48260Sstevel@tonic-gate {
48270Sstevel@tonic-gate 	if (aflt->flt_status & ECC_I_TRAP)
48280Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_I);
48290Sstevel@tonic-gate 	if (aflt->flt_status & ECC_D_TRAP)
48300Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_D);
48310Sstevel@tonic-gate 	if (aflt->flt_status & ECC_F_TRAP)
48320Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_F);
48330Sstevel@tonic-gate 	if (aflt->flt_status & ECC_C_TRAP)
48340Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_C);
48350Sstevel@tonic-gate 	if (aflt->flt_status & ECC_DP_TRAP)
48360Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_DP);
48370Sstevel@tonic-gate 	if (aflt->flt_status & ECC_IP_TRAP)
48380Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_IP);
48390Sstevel@tonic-gate 	if (aflt->flt_status & ECC_ITLB_TRAP)
48400Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_ITLB);
48410Sstevel@tonic-gate 	if (aflt->flt_status & ECC_DTLB_TRAP)
48420Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_DTLB);
48430Sstevel@tonic-gate 	return (TRAP_TYPE_UNKNOWN);
48440Sstevel@tonic-gate }
48450Sstevel@tonic-gate 
48460Sstevel@tonic-gate /*
48470Sstevel@tonic-gate  * Decide an error type based on detector and leaky/partner tests.
48480Sstevel@tonic-gate  * The following array is used for quick translation - it must
48490Sstevel@tonic-gate  * stay in sync with ce_dispact_t.
48500Sstevel@tonic-gate  */
48510Sstevel@tonic-gate 
48520Sstevel@tonic-gate static char *cetypes[] = {
48530Sstevel@tonic-gate 	CE_DISP_DESC_U,
48540Sstevel@tonic-gate 	CE_DISP_DESC_I,
48550Sstevel@tonic-gate 	CE_DISP_DESC_PP,
48560Sstevel@tonic-gate 	CE_DISP_DESC_P,
48570Sstevel@tonic-gate 	CE_DISP_DESC_L,
48580Sstevel@tonic-gate 	CE_DISP_DESC_PS,
48590Sstevel@tonic-gate 	CE_DISP_DESC_S
48600Sstevel@tonic-gate };
48610Sstevel@tonic-gate 
48620Sstevel@tonic-gate char *
48630Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt)
48640Sstevel@tonic-gate {
48650Sstevel@tonic-gate 	ce_dispact_t dispact, disp;
48660Sstevel@tonic-gate 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
48670Sstevel@tonic-gate 
48680Sstevel@tonic-gate 	/*
48690Sstevel@tonic-gate 	 * The memory payload bundle is shared by some events that do
48700Sstevel@tonic-gate 	 * not perform any classification.  For those flt_disp will be
48710Sstevel@tonic-gate 	 * 0 and we will return "unknown".
48720Sstevel@tonic-gate 	 */
48730Sstevel@tonic-gate 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
48740Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
48750Sstevel@tonic-gate 
48760Sstevel@tonic-gate 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
48770Sstevel@tonic-gate 
48780Sstevel@tonic-gate 	/*
48790Sstevel@tonic-gate 	 * It is also possible that no scrub/classification was performed
48800Sstevel@tonic-gate 	 * by the detector, for instance where a disrupting error logged
48810Sstevel@tonic-gate 	 * in the AFSR while CEEN was off in cpu_deferred_error.
48820Sstevel@tonic-gate 	 */
48830Sstevel@tonic-gate 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
48840Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
48850Sstevel@tonic-gate 
48860Sstevel@tonic-gate 	/*
48870Sstevel@tonic-gate 	 * Lookup type in initial classification/action table
48880Sstevel@tonic-gate 	 */
48890Sstevel@tonic-gate 	dispact = CE_DISPACT(ce_disp_table,
48900Sstevel@tonic-gate 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
48910Sstevel@tonic-gate 	    CE_XDIAG_STATE(dtcrinfo),
48920Sstevel@tonic-gate 	    CE_XDIAG_CE1SEEN(dtcrinfo),
48930Sstevel@tonic-gate 	    CE_XDIAG_CE2SEEN(dtcrinfo));
48940Sstevel@tonic-gate 
48950Sstevel@tonic-gate 	/*
48960Sstevel@tonic-gate 	 * A bad lookup is not something to panic production systems for.
48970Sstevel@tonic-gate 	 */
48980Sstevel@tonic-gate 	ASSERT(dispact != CE_DISP_BAD);
48990Sstevel@tonic-gate 	if (dispact == CE_DISP_BAD)
49000Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
49010Sstevel@tonic-gate 
49020Sstevel@tonic-gate 	disp = CE_DISP(dispact);
49030Sstevel@tonic-gate 
49040Sstevel@tonic-gate 	switch (disp) {
49050Sstevel@tonic-gate 	case CE_DISP_UNKNOWN:
49060Sstevel@tonic-gate 	case CE_DISP_INTERMITTENT:
49070Sstevel@tonic-gate 		break;
49080Sstevel@tonic-gate 
49090Sstevel@tonic-gate 	case CE_DISP_POSS_PERS:
49100Sstevel@tonic-gate 		/*
49110Sstevel@tonic-gate 		 * "Possible persistent" errors to which we have applied a valid
49120Sstevel@tonic-gate 		 * leaky test can be separated into "persistent" or "leaky".
49130Sstevel@tonic-gate 		 */
49140Sstevel@tonic-gate 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
49150Sstevel@tonic-gate 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
49160Sstevel@tonic-gate 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
49170Sstevel@tonic-gate 			    CE_XDIAG_CE2SEEN(lkyinfo))
49180Sstevel@tonic-gate 				disp = CE_DISP_LEAKY;
49190Sstevel@tonic-gate 			else
49200Sstevel@tonic-gate 				disp = CE_DISP_PERS;
49210Sstevel@tonic-gate 		}
49220Sstevel@tonic-gate 		break;
49230Sstevel@tonic-gate 
49240Sstevel@tonic-gate 	case CE_DISP_POSS_STICKY:
49250Sstevel@tonic-gate 		/*
49260Sstevel@tonic-gate 		 * Promote "possible sticky" results that have been
49270Sstevel@tonic-gate 		 * confirmed by a partner test to "sticky".  Unconfirmed
49280Sstevel@tonic-gate 		 * "possible sticky" events are left at that status - we do not
49290Sstevel@tonic-gate 		 * guess at any bad reader/writer etc status here.
49300Sstevel@tonic-gate 		 */
49310Sstevel@tonic-gate 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
49320Sstevel@tonic-gate 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
49330Sstevel@tonic-gate 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
49340Sstevel@tonic-gate 			disp = CE_DISP_STICKY;
49350Sstevel@tonic-gate 
49360Sstevel@tonic-gate 		/*
49370Sstevel@tonic-gate 		 * Promote "possible sticky" results on a uniprocessor
49380Sstevel@tonic-gate 		 * to "sticky"
49390Sstevel@tonic-gate 		 */
49400Sstevel@tonic-gate 		if (disp == CE_DISP_POSS_STICKY &&
49410Sstevel@tonic-gate 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
49420Sstevel@tonic-gate 			disp = CE_DISP_STICKY;
49430Sstevel@tonic-gate 		break;
49440Sstevel@tonic-gate 
49450Sstevel@tonic-gate 	default:
49460Sstevel@tonic-gate 		disp = CE_DISP_UNKNOWN;
49470Sstevel@tonic-gate 		break;
49480Sstevel@tonic-gate 	}
49490Sstevel@tonic-gate 
49500Sstevel@tonic-gate 	return (cetypes[disp]);
49510Sstevel@tonic-gate }
49520Sstevel@tonic-gate 
49530Sstevel@tonic-gate /*
49540Sstevel@tonic-gate  * Given the entire afsr, the specific bit to check and a prioritized list of
49550Sstevel@tonic-gate  * error bits, determine the validity of the various overwrite priority
49560Sstevel@tonic-gate  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
49570Sstevel@tonic-gate  * different overwrite priorities.
49580Sstevel@tonic-gate  *
49590Sstevel@tonic-gate  * Given a specific afsr error bit and the entire afsr, there are three cases:
49600Sstevel@tonic-gate  *   INVALID:	The specified bit is lower overwrite priority than some other
49610Sstevel@tonic-gate  *		error bit which is on in the afsr (or IVU/IVC).
49620Sstevel@tonic-gate  *   VALID:	The specified bit is higher priority than all other error bits
49630Sstevel@tonic-gate  *		which are on in the afsr.
49640Sstevel@tonic-gate  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
49650Sstevel@tonic-gate  *		bit is on in the afsr.
49660Sstevel@tonic-gate  */
49670Sstevel@tonic-gate int
49680Sstevel@tonic-gate afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
49690Sstevel@tonic-gate {
49700Sstevel@tonic-gate 	uint64_t afsr_ow;
49710Sstevel@tonic-gate 
49720Sstevel@tonic-gate 	while ((afsr_ow = *ow_bits++) != 0) {
49730Sstevel@tonic-gate 		/*
49740Sstevel@tonic-gate 		 * If bit is in the priority class, check to see if another
49750Sstevel@tonic-gate 		 * bit in the same class is on => ambiguous.  Otherwise,
49760Sstevel@tonic-gate 		 * the value is valid.  If the bit is not on at this priority
49770Sstevel@tonic-gate 		 * class, but a higher priority bit is on, then the value is
49780Sstevel@tonic-gate 		 * invalid.
49790Sstevel@tonic-gate 		 */
49800Sstevel@tonic-gate 		if (afsr_ow & afsr_bit) {
49810Sstevel@tonic-gate 			/*
49820Sstevel@tonic-gate 			 * If equal pri bit is on, ambiguous.
49830Sstevel@tonic-gate 			 */
49840Sstevel@tonic-gate 			if (afsr & (afsr_ow & ~afsr_bit))
49850Sstevel@tonic-gate 				return (AFLT_STAT_AMBIGUOUS);
49860Sstevel@tonic-gate 			return (AFLT_STAT_VALID);
49870Sstevel@tonic-gate 		} else if (afsr & afsr_ow)
49880Sstevel@tonic-gate 			break;
49890Sstevel@tonic-gate 	}
49900Sstevel@tonic-gate 
49910Sstevel@tonic-gate 	/*
49920Sstevel@tonic-gate 	 * We didn't find a match or a higher priority bit was on.  Not
49930Sstevel@tonic-gate 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
49940Sstevel@tonic-gate 	 */
49950Sstevel@tonic-gate 	return (AFLT_STAT_INVALID);
49960Sstevel@tonic-gate }
49970Sstevel@tonic-gate 
49980Sstevel@tonic-gate static int
49990Sstevel@tonic-gate afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
50000Sstevel@tonic-gate {
50010Sstevel@tonic-gate #if defined(SERRANO)
50020Sstevel@tonic-gate 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
50030Sstevel@tonic-gate 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
50040Sstevel@tonic-gate 	else
50050Sstevel@tonic-gate #endif	/* SERRANO */
50060Sstevel@tonic-gate 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
50070Sstevel@tonic-gate }
50080Sstevel@tonic-gate 
50090Sstevel@tonic-gate static int
50100Sstevel@tonic-gate afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
50110Sstevel@tonic-gate {
50120Sstevel@tonic-gate 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
50130Sstevel@tonic-gate }
50140Sstevel@tonic-gate 
50150Sstevel@tonic-gate static int
50160Sstevel@tonic-gate afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
50170Sstevel@tonic-gate {
50180Sstevel@tonic-gate 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
50190Sstevel@tonic-gate }
50200Sstevel@tonic-gate 
50210Sstevel@tonic-gate static int
50220Sstevel@tonic-gate afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
50230Sstevel@tonic-gate {
50240Sstevel@tonic-gate #ifdef lint
50250Sstevel@tonic-gate 	cpuid = cpuid;
50260Sstevel@tonic-gate #endif
50270Sstevel@tonic-gate 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
50280Sstevel@tonic-gate 		return (afsr_to_msynd_status(afsr, afsr_bit));
50290Sstevel@tonic-gate 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
50300Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
50310Sstevel@tonic-gate 		/*
50320Sstevel@tonic-gate 		 * The E_SYND overwrite policy is slightly different
50330Sstevel@tonic-gate 		 * for Panther CPUs.
50340Sstevel@tonic-gate 		 */
50350Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[cpuid].implementation))
50360Sstevel@tonic-gate 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
50370Sstevel@tonic-gate 		else
50380Sstevel@tonic-gate 			return (afsr_to_esynd_status(afsr, afsr_bit));
50390Sstevel@tonic-gate #else /* CHEETAH_PLUS */
50400Sstevel@tonic-gate 		return (afsr_to_esynd_status(afsr, afsr_bit));
50410Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
50420Sstevel@tonic-gate 	} else {
50430Sstevel@tonic-gate 		return (AFLT_STAT_INVALID);
50440Sstevel@tonic-gate 	}
50450Sstevel@tonic-gate }
50460Sstevel@tonic-gate 
50470Sstevel@tonic-gate /*
50480Sstevel@tonic-gate  * Slave CPU stick synchronization.
50490Sstevel@tonic-gate  */
50500Sstevel@tonic-gate void
50510Sstevel@tonic-gate sticksync_slave(void)
50520Sstevel@tonic-gate {
50530Sstevel@tonic-gate 	int 		i;
50540Sstevel@tonic-gate 	int		tries = 0;
50550Sstevel@tonic-gate 	int64_t		tskew;
50560Sstevel@tonic-gate 	int64_t		av_tskew;
50570Sstevel@tonic-gate 
50580Sstevel@tonic-gate 	kpreempt_disable();
50590Sstevel@tonic-gate 	/* wait for the master side */
50600Sstevel@tonic-gate 	while (stick_sync_cmd != SLAVE_START)
50610Sstevel@tonic-gate 		;
50620Sstevel@tonic-gate 	/*
50630Sstevel@tonic-gate 	 * Synchronization should only take a few tries at most. But in the
50640Sstevel@tonic-gate 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
50650Sstevel@tonic-gate 	 * without it's stick synchronized wouldn't be a good citizen.
50660Sstevel@tonic-gate 	 */
50670Sstevel@tonic-gate 	while (slave_done == 0) {
50680Sstevel@tonic-gate 		/*
50690Sstevel@tonic-gate 		 * Time skew calculation.
50700Sstevel@tonic-gate 		 */
50710Sstevel@tonic-gate 		av_tskew = tskew = 0;
50720Sstevel@tonic-gate 
50730Sstevel@tonic-gate 		for (i = 0; i < stick_iter; i++) {
50740Sstevel@tonic-gate 			/* make location hot */
50750Sstevel@tonic-gate 			timestamp[EV_A_START] = 0;
50760Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_A_START]);
50770Sstevel@tonic-gate 
50780Sstevel@tonic-gate 			/* tell the master we're ready */
50790Sstevel@tonic-gate 			stick_sync_cmd = MASTER_START;
50800Sstevel@tonic-gate 
50810Sstevel@tonic-gate 			/* and wait */
50820Sstevel@tonic-gate 			while (stick_sync_cmd != SLAVE_CONT)
50830Sstevel@tonic-gate 				;
50840Sstevel@tonic-gate 			/* Event B end */
50850Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_B_END]);
50860Sstevel@tonic-gate 
50870Sstevel@tonic-gate 			/* calculate time skew */
50880Sstevel@tonic-gate 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
50890Sstevel@tonic-gate 				- (timestamp[EV_A_END] -
50900Sstevel@tonic-gate 				timestamp[EV_A_START])) / 2;
50910Sstevel@tonic-gate 
50920Sstevel@tonic-gate 			/* keep running count */
50930Sstevel@tonic-gate 			av_tskew += tskew;
50940Sstevel@tonic-gate 		} /* for */
50950Sstevel@tonic-gate 
50960Sstevel@tonic-gate 		/*
50970Sstevel@tonic-gate 		 * Adjust stick for time skew if not within the max allowed;
50980Sstevel@tonic-gate 		 * otherwise we're all done.
50990Sstevel@tonic-gate 		 */
51000Sstevel@tonic-gate 		if (stick_iter != 0)
51010Sstevel@tonic-gate 			av_tskew = av_tskew/stick_iter;
51020Sstevel@tonic-gate 		if (ABS(av_tskew) > stick_tsk) {
51030Sstevel@tonic-gate 			/*
51040Sstevel@tonic-gate 			 * If the skew is 1 (the slave's STICK register
51050Sstevel@tonic-gate 			 * is 1 STICK ahead of the master's), stick_adj
51060Sstevel@tonic-gate 			 * could fail to adjust the slave's STICK register
51070Sstevel@tonic-gate 			 * if the STICK read on the slave happens to
51080Sstevel@tonic-gate 			 * align with the increment of the STICK.
51090Sstevel@tonic-gate 			 * Therefore, we increment the skew to 2.
51100Sstevel@tonic-gate 			 */
51110Sstevel@tonic-gate 			if (av_tskew == 1)
51120Sstevel@tonic-gate 				av_tskew++;
51130Sstevel@tonic-gate 			stick_adj(-av_tskew);
51140Sstevel@tonic-gate 		} else
51150Sstevel@tonic-gate 			slave_done = 1;
51160Sstevel@tonic-gate #ifdef DEBUG
51170Sstevel@tonic-gate 		if (tries < DSYNC_ATTEMPTS)
51180Sstevel@tonic-gate 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
51190Sstevel@tonic-gate 				av_tskew;
51200Sstevel@tonic-gate 		++tries;
51210Sstevel@tonic-gate #endif /* DEBUG */
51220Sstevel@tonic-gate #ifdef lint
51230Sstevel@tonic-gate 		tries = tries;
51240Sstevel@tonic-gate #endif
51250Sstevel@tonic-gate 
51260Sstevel@tonic-gate 	} /* while */
51270Sstevel@tonic-gate 
51280Sstevel@tonic-gate 	/* allow the master to finish */
51290Sstevel@tonic-gate 	stick_sync_cmd = EVENT_NULL;
51300Sstevel@tonic-gate 	kpreempt_enable();
51310Sstevel@tonic-gate }
51320Sstevel@tonic-gate 
51330Sstevel@tonic-gate /*
51340Sstevel@tonic-gate  * Master CPU side of stick synchronization.
51350Sstevel@tonic-gate  *  - timestamp end of Event A
51360Sstevel@tonic-gate  *  - timestamp beginning of Event B
51370Sstevel@tonic-gate  */
51380Sstevel@tonic-gate void
51390Sstevel@tonic-gate sticksync_master(void)
51400Sstevel@tonic-gate {
51410Sstevel@tonic-gate 	int		i;
51420Sstevel@tonic-gate 
51430Sstevel@tonic-gate 	kpreempt_disable();
51440Sstevel@tonic-gate 	/* tell the slave we've started */
51450Sstevel@tonic-gate 	slave_done = 0;
51460Sstevel@tonic-gate 	stick_sync_cmd = SLAVE_START;
51470Sstevel@tonic-gate 
51480Sstevel@tonic-gate 	while (slave_done == 0) {
51490Sstevel@tonic-gate 		for (i = 0; i < stick_iter; i++) {
51500Sstevel@tonic-gate 			/* wait for the slave */
51510Sstevel@tonic-gate 			while (stick_sync_cmd != MASTER_START)
51520Sstevel@tonic-gate 				;
51530Sstevel@tonic-gate 			/* Event A end */
51540Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_A_END]);
51550Sstevel@tonic-gate 
51560Sstevel@tonic-gate 			/* make location hot */
51570Sstevel@tonic-gate 			timestamp[EV_B_START] = 0;
51580Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_B_START]);
51590Sstevel@tonic-gate 
51600Sstevel@tonic-gate 			/* tell the slave to continue */
51610Sstevel@tonic-gate 			stick_sync_cmd = SLAVE_CONT;
51620Sstevel@tonic-gate 		} /* for */
51630Sstevel@tonic-gate 
51640Sstevel@tonic-gate 		/* wait while slave calculates time skew */
51650Sstevel@tonic-gate 		while (stick_sync_cmd == SLAVE_CONT)
51660Sstevel@tonic-gate 			;
51670Sstevel@tonic-gate 	} /* while */
51680Sstevel@tonic-gate 	kpreempt_enable();
51690Sstevel@tonic-gate }
51700Sstevel@tonic-gate 
51710Sstevel@tonic-gate /*
51720Sstevel@tonic-gate  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
51730Sstevel@tonic-gate  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
51740Sstevel@tonic-gate  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
51750Sstevel@tonic-gate  * panic idle.
51760Sstevel@tonic-gate  */
51770Sstevel@tonic-gate /*ARGSUSED*/
51780Sstevel@tonic-gate void
51790Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt)
51800Sstevel@tonic-gate {}
51810Sstevel@tonic-gate 
51820Sstevel@tonic-gate struct kmem_cache *ch_private_cache;
51830Sstevel@tonic-gate 
51840Sstevel@tonic-gate /*
51850Sstevel@tonic-gate  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
51860Sstevel@tonic-gate  * deallocate the scrubber data structures and cpu_private data structure.
51870Sstevel@tonic-gate  */
51880Sstevel@tonic-gate void
51890Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp)
51900Sstevel@tonic-gate {
51910Sstevel@tonic-gate 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
51920Sstevel@tonic-gate 
51930Sstevel@tonic-gate 	ASSERT(chprp);
51940Sstevel@tonic-gate 	cpu_uninit_ecache_scrub_dr(cp);
51950Sstevel@tonic-gate 	CPU_PRIVATE(cp) = NULL;
51960Sstevel@tonic-gate 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
51970Sstevel@tonic-gate 	kmem_cache_free(ch_private_cache, chprp);
51980Sstevel@tonic-gate 	cmp_delete_cpu(cp->cpu_id);
51990Sstevel@tonic-gate 
52000Sstevel@tonic-gate }
52010Sstevel@tonic-gate 
52020Sstevel@tonic-gate /*
52030Sstevel@tonic-gate  * Cheetah Cache Scrubbing
52040Sstevel@tonic-gate  *
52050Sstevel@tonic-gate  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
52060Sstevel@tonic-gate  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
52070Sstevel@tonic-gate  * protected by either parity or ECC.
52080Sstevel@tonic-gate  *
52090Sstevel@tonic-gate  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
52100Sstevel@tonic-gate  * cache per second). Due to the the specifics of how the I$ control
52110Sstevel@tonic-gate  * logic works with respect to the ASI used to scrub I$ lines, the entire
52120Sstevel@tonic-gate  * I$ is scanned at once.
52130Sstevel@tonic-gate  */
52140Sstevel@tonic-gate 
52150Sstevel@tonic-gate /*
52160Sstevel@tonic-gate  * Tuneables to enable and disable the scrubbing of the caches, and to tune
52170Sstevel@tonic-gate  * scrubbing behavior.  These may be changed via /etc/system or using mdb
52180Sstevel@tonic-gate  * on a running system.
52190Sstevel@tonic-gate  */
52200Sstevel@tonic-gate int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
52210Sstevel@tonic-gate 
52220Sstevel@tonic-gate /*
52230Sstevel@tonic-gate  * The following are the PIL levels that the softints/cross traps will fire at.
52240Sstevel@tonic-gate  */
52250Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
52260Sstevel@tonic-gate uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
52270Sstevel@tonic-gate uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
52280Sstevel@tonic-gate 
52290Sstevel@tonic-gate #if defined(JALAPENO)
52300Sstevel@tonic-gate 
52310Sstevel@tonic-gate /*
52320Sstevel@tonic-gate  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
52330Sstevel@tonic-gate  * on Jalapeno.
52340Sstevel@tonic-gate  */
52350Sstevel@tonic-gate int ecache_scrub_enable = 0;
52360Sstevel@tonic-gate 
52370Sstevel@tonic-gate #else	/* JALAPENO */
52380Sstevel@tonic-gate 
52390Sstevel@tonic-gate /*
52400Sstevel@tonic-gate  * With all other cpu types, E$ scrubbing is on by default
52410Sstevel@tonic-gate  */
52420Sstevel@tonic-gate int ecache_scrub_enable = 1;
52430Sstevel@tonic-gate 
52440Sstevel@tonic-gate #endif	/* JALAPENO */
52450Sstevel@tonic-gate 
52460Sstevel@tonic-gate 
52470Sstevel@tonic-gate #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
52480Sstevel@tonic-gate 
52490Sstevel@tonic-gate /*
52500Sstevel@tonic-gate  * The I$ scrubber tends to cause latency problems for real-time SW, so it
52510Sstevel@tonic-gate  * is disabled by default on non-Cheetah systems
52520Sstevel@tonic-gate  */
52530Sstevel@tonic-gate int icache_scrub_enable = 0;
52540Sstevel@tonic-gate 
52550Sstevel@tonic-gate /*
52560Sstevel@tonic-gate  * Tuneables specifying the scrub calls per second and the scan rate
52570Sstevel@tonic-gate  * for each cache
52580Sstevel@tonic-gate  *
52590Sstevel@tonic-gate  * The cyclic times are set during boot based on the following values.
52600Sstevel@tonic-gate  * Changing these values in mdb after this time will have no effect.  If
52610Sstevel@tonic-gate  * a different value is desired, it must be set in /etc/system before a
52620Sstevel@tonic-gate  * reboot.
52630Sstevel@tonic-gate  */
52640Sstevel@tonic-gate int ecache_calls_a_sec = 1;
52650Sstevel@tonic-gate int dcache_calls_a_sec = 2;
52660Sstevel@tonic-gate int icache_calls_a_sec = 2;
52670Sstevel@tonic-gate 
52680Sstevel@tonic-gate int ecache_scan_rate_idle = 1;
52690Sstevel@tonic-gate int ecache_scan_rate_busy = 1;
52700Sstevel@tonic-gate int dcache_scan_rate_idle = 1;
52710Sstevel@tonic-gate int dcache_scan_rate_busy = 1;
52720Sstevel@tonic-gate int icache_scan_rate_idle = 1;
52730Sstevel@tonic-gate int icache_scan_rate_busy = 1;
52740Sstevel@tonic-gate 
52750Sstevel@tonic-gate #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
52760Sstevel@tonic-gate 
52770Sstevel@tonic-gate int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
52780Sstevel@tonic-gate 
52790Sstevel@tonic-gate int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
52800Sstevel@tonic-gate int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
52810Sstevel@tonic-gate int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
52820Sstevel@tonic-gate 
52830Sstevel@tonic-gate int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
52840Sstevel@tonic-gate int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
52850Sstevel@tonic-gate int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
52860Sstevel@tonic-gate int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
52870Sstevel@tonic-gate int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
52880Sstevel@tonic-gate int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
52890Sstevel@tonic-gate 
52900Sstevel@tonic-gate #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
52910Sstevel@tonic-gate 
52920Sstevel@tonic-gate /*
52930Sstevel@tonic-gate  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
52940Sstevel@tonic-gate  * increment the outstanding request counter and schedule a softint to run
52950Sstevel@tonic-gate  * the scrubber.
52960Sstevel@tonic-gate  */
52970Sstevel@tonic-gate extern xcfunc_t cache_scrubreq_tl1;
52980Sstevel@tonic-gate 
52990Sstevel@tonic-gate /*
53000Sstevel@tonic-gate  * These are the softint functions for each cache scrubber
53010Sstevel@tonic-gate  */
53020Sstevel@tonic-gate static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
53030Sstevel@tonic-gate static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
53040Sstevel@tonic-gate static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
53050Sstevel@tonic-gate 
53060Sstevel@tonic-gate /*
53070Sstevel@tonic-gate  * The cache scrub info table contains cache specific information
53080Sstevel@tonic-gate  * and allows for some of the scrub code to be table driven, reducing
53090Sstevel@tonic-gate  * duplication of cache similar code.
53100Sstevel@tonic-gate  *
53110Sstevel@tonic-gate  * This table keeps a copy of the value in the calls per second variable
53120Sstevel@tonic-gate  * (?cache_calls_a_sec).  This makes it much more difficult for someone
53130Sstevel@tonic-gate  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
53140Sstevel@tonic-gate  * mdb in a misguided attempt to disable the scrubber).
53150Sstevel@tonic-gate  */
53160Sstevel@tonic-gate struct scrub_info {
53170Sstevel@tonic-gate 	int		*csi_enable;	/* scrubber enable flag */
53180Sstevel@tonic-gate 	int		csi_freq;	/* scrubber calls per second */
53190Sstevel@tonic-gate 	int		csi_index;	/* index to chsm_outstanding[] */
53200Sstevel@tonic-gate 	uint_t		csi_inum;	/* scrubber interrupt number */
53210Sstevel@tonic-gate 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
53220Sstevel@tonic-gate 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
53230Sstevel@tonic-gate 	char		csi_name[3];	/* cache name for this scrub entry */
53240Sstevel@tonic-gate } cache_scrub_info[] = {
53250Sstevel@tonic-gate { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
53260Sstevel@tonic-gate { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
53270Sstevel@tonic-gate { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
53280Sstevel@tonic-gate };
53290Sstevel@tonic-gate 
53300Sstevel@tonic-gate /*
53310Sstevel@tonic-gate  * If scrubbing is enabled, increment the outstanding request counter.  If it
53320Sstevel@tonic-gate  * is 1 (meaning there were no previous requests outstanding), call
53330Sstevel@tonic-gate  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
53340Sstevel@tonic-gate  * a self trap.
53350Sstevel@tonic-gate  */
53360Sstevel@tonic-gate static void
53370Sstevel@tonic-gate do_scrub(struct scrub_info *csi)
53380Sstevel@tonic-gate {
53390Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
53400Sstevel@tonic-gate 	int index = csi->csi_index;
53410Sstevel@tonic-gate 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
53420Sstevel@tonic-gate 
53430Sstevel@tonic-gate 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
53440Sstevel@tonic-gate 		if (atomic_add_32_nv(outstanding, 1) == 1) {
53450Sstevel@tonic-gate 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
53460Sstevel@tonic-gate 			    csi->csi_inum, 0);
53470Sstevel@tonic-gate 		}
53480Sstevel@tonic-gate 	}
53490Sstevel@tonic-gate }
53500Sstevel@tonic-gate 
53510Sstevel@tonic-gate /*
53520Sstevel@tonic-gate  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
53530Sstevel@tonic-gate  * cross-trap the offline cpus.
53540Sstevel@tonic-gate  */
53550Sstevel@tonic-gate static void
53560Sstevel@tonic-gate do_scrub_offline(struct scrub_info *csi)
53570Sstevel@tonic-gate {
53580Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
53590Sstevel@tonic-gate 
53600Sstevel@tonic-gate 	if (CPUSET_ISNULL(cpu_offline_set)) {
53610Sstevel@tonic-gate 		/*
53620Sstevel@tonic-gate 		 * No offline cpus - nothing to do
53630Sstevel@tonic-gate 		 */
53640Sstevel@tonic-gate 		return;
53650Sstevel@tonic-gate 	}
53660Sstevel@tonic-gate 
53670Sstevel@tonic-gate 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
53680Sstevel@tonic-gate 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
53690Sstevel@tonic-gate 		    csi->csi_index);
53700Sstevel@tonic-gate 	}
53710Sstevel@tonic-gate }
53720Sstevel@tonic-gate 
53730Sstevel@tonic-gate /*
53740Sstevel@tonic-gate  * This is the initial setup for the scrubber cyclics - it sets the
53750Sstevel@tonic-gate  * interrupt level, frequency, and function to call.
53760Sstevel@tonic-gate  */
53770Sstevel@tonic-gate /*ARGSUSED*/
53780Sstevel@tonic-gate static void
53790Sstevel@tonic-gate cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
53800Sstevel@tonic-gate     cyc_time_t *when)
53810Sstevel@tonic-gate {
53820Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg;
53830Sstevel@tonic-gate 
53840Sstevel@tonic-gate 	ASSERT(csi != NULL);
53850Sstevel@tonic-gate 	hdlr->cyh_func = (cyc_func_t)do_scrub;
53860Sstevel@tonic-gate 	hdlr->cyh_level = CY_LOW_LEVEL;
53870Sstevel@tonic-gate 	hdlr->cyh_arg = arg;
53880Sstevel@tonic-gate 
53890Sstevel@tonic-gate 	when->cyt_when = 0;	/* Start immediately */
53900Sstevel@tonic-gate 	when->cyt_interval = NANOSEC / csi->csi_freq;
53910Sstevel@tonic-gate }
53920Sstevel@tonic-gate 
53930Sstevel@tonic-gate /*
53940Sstevel@tonic-gate  * Initialization for cache scrubbing.
53950Sstevel@tonic-gate  * This routine is called AFTER all cpus have had cpu_init_private called
53960Sstevel@tonic-gate  * to initialize their private data areas.
53970Sstevel@tonic-gate  */
53980Sstevel@tonic-gate void
53990Sstevel@tonic-gate cpu_init_cache_scrub(void)
54000Sstevel@tonic-gate {
54010Sstevel@tonic-gate 	int i;
54020Sstevel@tonic-gate 	struct scrub_info *csi;
54030Sstevel@tonic-gate 	cyc_omni_handler_t omni_hdlr;
54040Sstevel@tonic-gate 	cyc_handler_t offline_hdlr;
54050Sstevel@tonic-gate 	cyc_time_t when;
54060Sstevel@tonic-gate 
54070Sstevel@tonic-gate 	/*
54080Sstevel@tonic-gate 	 * save away the maximum number of lines for the D$
54090Sstevel@tonic-gate 	 */
54100Sstevel@tonic-gate 	dcache_nlines = dcache_size / dcache_linesize;
54110Sstevel@tonic-gate 
54120Sstevel@tonic-gate 	/*
54130Sstevel@tonic-gate 	 * register the softints for the cache scrubbing
54140Sstevel@tonic-gate 	 */
54150Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
54160Sstevel@tonic-gate 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
54170Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
54180Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
54190Sstevel@tonic-gate 
54200Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
54210Sstevel@tonic-gate 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
54220Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
54230Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
54240Sstevel@tonic-gate 
54250Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
54260Sstevel@tonic-gate 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
54270Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
54280Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
54290Sstevel@tonic-gate 
54300Sstevel@tonic-gate 	/*
54310Sstevel@tonic-gate 	 * start the scrubbing for all the caches
54320Sstevel@tonic-gate 	 */
54330Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
54340Sstevel@tonic-gate 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
54350Sstevel@tonic-gate 
54360Sstevel@tonic-gate 		csi = &cache_scrub_info[i];
54370Sstevel@tonic-gate 
54380Sstevel@tonic-gate 		if (!(*csi->csi_enable))
54390Sstevel@tonic-gate 			continue;
54400Sstevel@tonic-gate 
54410Sstevel@tonic-gate 		/*
54420Sstevel@tonic-gate 		 * force the following to be true:
54430Sstevel@tonic-gate 		 *	1 <= calls_a_sec <= hz
54440Sstevel@tonic-gate 		 */
54450Sstevel@tonic-gate 		if (csi->csi_freq > hz) {
54460Sstevel@tonic-gate 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
54470Sstevel@tonic-gate 				"(%d); resetting to hz (%d)", csi->csi_name,
54480Sstevel@tonic-gate 				csi->csi_freq, hz);
54490Sstevel@tonic-gate 			csi->csi_freq = hz;
54500Sstevel@tonic-gate 		} else if (csi->csi_freq < 1) {
54510Sstevel@tonic-gate 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
54520Sstevel@tonic-gate 				"(%d); resetting to 1", csi->csi_name,
54530Sstevel@tonic-gate 				csi->csi_freq);
54540Sstevel@tonic-gate 			csi->csi_freq = 1;
54550Sstevel@tonic-gate 		}
54560Sstevel@tonic-gate 
54570Sstevel@tonic-gate 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
54580Sstevel@tonic-gate 		omni_hdlr.cyo_offline = NULL;
54590Sstevel@tonic-gate 		omni_hdlr.cyo_arg = (void *)csi;
54600Sstevel@tonic-gate 
54610Sstevel@tonic-gate 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
54620Sstevel@tonic-gate 		offline_hdlr.cyh_arg = (void *)csi;
54630Sstevel@tonic-gate 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
54640Sstevel@tonic-gate 
54650Sstevel@tonic-gate 		when.cyt_when = 0;	/* Start immediately */
54660Sstevel@tonic-gate 		when.cyt_interval = NANOSEC / csi->csi_freq;
54670Sstevel@tonic-gate 
54680Sstevel@tonic-gate 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
54690Sstevel@tonic-gate 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
54700Sstevel@tonic-gate 	}
54710Sstevel@tonic-gate 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
54720Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
54730Sstevel@tonic-gate }
54740Sstevel@tonic-gate 
54750Sstevel@tonic-gate /*
54760Sstevel@tonic-gate  * Indicate that the specified cpu is idle.
54770Sstevel@tonic-gate  */
54780Sstevel@tonic-gate void
54790Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp)
54800Sstevel@tonic-gate {
54810Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
54820Sstevel@tonic-gate 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
54830Sstevel@tonic-gate 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
54840Sstevel@tonic-gate 	}
54850Sstevel@tonic-gate }
54860Sstevel@tonic-gate 
54870Sstevel@tonic-gate /*
54880Sstevel@tonic-gate  * Indicate that the specified cpu is busy.
54890Sstevel@tonic-gate  */
54900Sstevel@tonic-gate void
54910Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp)
54920Sstevel@tonic-gate {
54930Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
54940Sstevel@tonic-gate 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
54950Sstevel@tonic-gate 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
54960Sstevel@tonic-gate 	}
54970Sstevel@tonic-gate }
54980Sstevel@tonic-gate 
54990Sstevel@tonic-gate /*
55000Sstevel@tonic-gate  * Initialization for cache scrubbing for the specified cpu.
55010Sstevel@tonic-gate  */
55020Sstevel@tonic-gate void
55030Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp)
55040Sstevel@tonic-gate {
55050Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
55060Sstevel@tonic-gate 	int cpuid = cp->cpu_id;
55070Sstevel@tonic-gate 
55080Sstevel@tonic-gate 	/* initialize the number of lines in the caches */
55090Sstevel@tonic-gate 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
55100Sstevel@tonic-gate 	    cpunodes[cpuid].ecache_linesize;
55110Sstevel@tonic-gate 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
55120Sstevel@tonic-gate 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
55130Sstevel@tonic-gate 
55140Sstevel@tonic-gate 	/*
55150Sstevel@tonic-gate 	 * do_scrub() and do_scrub_offline() check both the global
55160Sstevel@tonic-gate 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
55170Sstevel@tonic-gate 	 * check this value before scrubbing.  Currently, we use it to
55180Sstevel@tonic-gate 	 * disable the E$ scrubber on multi-core cpus or while running at
55190Sstevel@tonic-gate 	 * slowed speed.  For now, just turn everything on and allow
55200Sstevel@tonic-gate 	 * cpu_init_private() to change it if necessary.
55210Sstevel@tonic-gate 	 */
55220Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
55230Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
55240Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
55250Sstevel@tonic-gate 
55260Sstevel@tonic-gate 	cpu_busy_ecache_scrub(cp);
55270Sstevel@tonic-gate }
55280Sstevel@tonic-gate 
55290Sstevel@tonic-gate /*
55300Sstevel@tonic-gate  * Un-initialization for cache scrubbing for the specified cpu.
55310Sstevel@tonic-gate  */
55320Sstevel@tonic-gate static void
55330Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp)
55340Sstevel@tonic-gate {
55350Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
55360Sstevel@tonic-gate 
55370Sstevel@tonic-gate 	/*
55380Sstevel@tonic-gate 	 * un-initialize bookkeeping for cache scrubbing
55390Sstevel@tonic-gate 	 */
55400Sstevel@tonic-gate 	bzero(csmp, sizeof (ch_scrub_misc_t));
55410Sstevel@tonic-gate 
55420Sstevel@tonic-gate 	cpu_idle_ecache_scrub(cp);
55430Sstevel@tonic-gate }
55440Sstevel@tonic-gate 
55450Sstevel@tonic-gate /*
55460Sstevel@tonic-gate  * Called periodically on each CPU to scrub the D$.
55470Sstevel@tonic-gate  */
55480Sstevel@tonic-gate static void
55490Sstevel@tonic-gate scrub_dcache(int how_many)
55500Sstevel@tonic-gate {
55510Sstevel@tonic-gate 	int i;
55520Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
55530Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
55540Sstevel@tonic-gate 
55550Sstevel@tonic-gate 	/*
55560Sstevel@tonic-gate 	 * scrub the desired number of lines
55570Sstevel@tonic-gate 	 */
55580Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
55590Sstevel@tonic-gate 		/*
55600Sstevel@tonic-gate 		 * scrub a D$ line
55610Sstevel@tonic-gate 		 */
55620Sstevel@tonic-gate 		dcache_inval_line(index);
55630Sstevel@tonic-gate 
55640Sstevel@tonic-gate 		/*
55650Sstevel@tonic-gate 		 * calculate the next D$ line to scrub, assumes
55660Sstevel@tonic-gate 		 * that dcache_nlines is a power of 2
55670Sstevel@tonic-gate 		 */
55680Sstevel@tonic-gate 		index = (index + 1) & (dcache_nlines - 1);
55690Sstevel@tonic-gate 	}
55700Sstevel@tonic-gate 
55710Sstevel@tonic-gate 	/*
55720Sstevel@tonic-gate 	 * set the scrub index for the next visit
55730Sstevel@tonic-gate 	 */
55740Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
55750Sstevel@tonic-gate }
55760Sstevel@tonic-gate 
55770Sstevel@tonic-gate /*
55780Sstevel@tonic-gate  * Handler for D$ scrub inum softint. Call scrub_dcache until
55790Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
55800Sstevel@tonic-gate  */
55810Sstevel@tonic-gate /*ARGSUSED*/
55820Sstevel@tonic-gate static uint_t
55830Sstevel@tonic-gate scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
55840Sstevel@tonic-gate {
55850Sstevel@tonic-gate 	int i;
55860Sstevel@tonic-gate 	int how_many;
55870Sstevel@tonic-gate 	int outstanding;
55880Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
55890Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
55900Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
55910Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
55920Sstevel@tonic-gate 		dcache_scan_rate_idle : dcache_scan_rate_busy;
55930Sstevel@tonic-gate 
55940Sstevel@tonic-gate 	/*
55950Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
55960Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
55970Sstevel@tonic-gate 	 * cache is scanned every second.
55980Sstevel@tonic-gate 	 */
55990Sstevel@tonic-gate 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
56000Sstevel@tonic-gate 
56010Sstevel@tonic-gate 	do {
56020Sstevel@tonic-gate 		outstanding = *countp;
56030Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
56040Sstevel@tonic-gate 			scrub_dcache(how_many);
56050Sstevel@tonic-gate 		}
56060Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
56070Sstevel@tonic-gate 
56080Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
56090Sstevel@tonic-gate }
56100Sstevel@tonic-gate 
56110Sstevel@tonic-gate /*
56120Sstevel@tonic-gate  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
56130Sstevel@tonic-gate  * by invalidating lines. Due to the characteristics of the ASI which
56140Sstevel@tonic-gate  * is used to invalidate an I$ line, the entire I$ must be invalidated
56150Sstevel@tonic-gate  * vs. an individual I$ line.
56160Sstevel@tonic-gate  */
56170Sstevel@tonic-gate static void
56180Sstevel@tonic-gate scrub_icache(int how_many)
56190Sstevel@tonic-gate {
56200Sstevel@tonic-gate 	int i;
56210Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
56220Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
56230Sstevel@tonic-gate 	int icache_nlines = csmp->chsm_icache_nlines;
56240Sstevel@tonic-gate 
56250Sstevel@tonic-gate 	/*
56260Sstevel@tonic-gate 	 * scrub the desired number of lines
56270Sstevel@tonic-gate 	 */
56280Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
56290Sstevel@tonic-gate 		/*
56300Sstevel@tonic-gate 		 * since the entire I$ must be scrubbed at once,
56310Sstevel@tonic-gate 		 * wait until the index wraps to zero to invalidate
56320Sstevel@tonic-gate 		 * the entire I$
56330Sstevel@tonic-gate 		 */
56340Sstevel@tonic-gate 		if (index == 0) {
56350Sstevel@tonic-gate 			icache_inval_all();
56360Sstevel@tonic-gate 		}
56370Sstevel@tonic-gate 
56380Sstevel@tonic-gate 		/*
56390Sstevel@tonic-gate 		 * calculate the next I$ line to scrub, assumes
56400Sstevel@tonic-gate 		 * that chsm_icache_nlines is a power of 2
56410Sstevel@tonic-gate 		 */
56420Sstevel@tonic-gate 		index = (index + 1) & (icache_nlines - 1);
56430Sstevel@tonic-gate 	}
56440Sstevel@tonic-gate 
56450Sstevel@tonic-gate 	/*
56460Sstevel@tonic-gate 	 * set the scrub index for the next visit
56470Sstevel@tonic-gate 	 */
56480Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
56490Sstevel@tonic-gate }
56500Sstevel@tonic-gate 
56510Sstevel@tonic-gate /*
56520Sstevel@tonic-gate  * Handler for I$ scrub inum softint. Call scrub_icache until
56530Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
56540Sstevel@tonic-gate  */
56550Sstevel@tonic-gate /*ARGSUSED*/
56560Sstevel@tonic-gate static uint_t
56570Sstevel@tonic-gate scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
56580Sstevel@tonic-gate {
56590Sstevel@tonic-gate 	int i;
56600Sstevel@tonic-gate 	int how_many;
56610Sstevel@tonic-gate 	int outstanding;
56620Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
56630Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
56640Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
56650Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
56660Sstevel@tonic-gate 	    icache_scan_rate_idle : icache_scan_rate_busy;
56670Sstevel@tonic-gate 	int icache_nlines = csmp->chsm_icache_nlines;
56680Sstevel@tonic-gate 
56690Sstevel@tonic-gate 	/*
56700Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
56710Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
56720Sstevel@tonic-gate 	 * cache is scanned every second.
56730Sstevel@tonic-gate 	 */
56740Sstevel@tonic-gate 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
56750Sstevel@tonic-gate 
56760Sstevel@tonic-gate 	do {
56770Sstevel@tonic-gate 		outstanding = *countp;
56780Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
56790Sstevel@tonic-gate 			scrub_icache(how_many);
56800Sstevel@tonic-gate 		}
56810Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
56820Sstevel@tonic-gate 
56830Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
56840Sstevel@tonic-gate }
56850Sstevel@tonic-gate 
56860Sstevel@tonic-gate /*
56870Sstevel@tonic-gate  * Called periodically on each CPU to scrub the E$.
56880Sstevel@tonic-gate  */
56890Sstevel@tonic-gate static void
56900Sstevel@tonic-gate scrub_ecache(int how_many)
56910Sstevel@tonic-gate {
56920Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
56930Sstevel@tonic-gate 	int i;
56940Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
56950Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
56960Sstevel@tonic-gate 	int nlines = csmp->chsm_ecache_nlines;
56970Sstevel@tonic-gate 	int linesize = cpunodes[cpuid].ecache_linesize;
56980Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
56990Sstevel@tonic-gate 
57000Sstevel@tonic-gate 	/*
57010Sstevel@tonic-gate 	 * scrub the desired number of lines
57020Sstevel@tonic-gate 	 */
57030Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
57040Sstevel@tonic-gate 		/*
57050Sstevel@tonic-gate 		 * scrub the E$ line
57060Sstevel@tonic-gate 		 */
57070Sstevel@tonic-gate 		ecache_flush_line(ecache_flushaddr + (index * linesize),
57080Sstevel@tonic-gate 		    ec_set_size);
57090Sstevel@tonic-gate 
57100Sstevel@tonic-gate 		/*
57110Sstevel@tonic-gate 		 * calculate the next E$ line to scrub based on twice
57120Sstevel@tonic-gate 		 * the number of E$ lines (to displace lines containing
57130Sstevel@tonic-gate 		 * flush area data), assumes that the number of lines
57140Sstevel@tonic-gate 		 * is a power of 2
57150Sstevel@tonic-gate 		 */
57160Sstevel@tonic-gate 		index = (index + 1) & ((nlines << 1) - 1);
57170Sstevel@tonic-gate 	}
57180Sstevel@tonic-gate 
57190Sstevel@tonic-gate 	/*
57200Sstevel@tonic-gate 	 * set the ecache scrub index for the next visit
57210Sstevel@tonic-gate 	 */
57220Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
57230Sstevel@tonic-gate }
57240Sstevel@tonic-gate 
57250Sstevel@tonic-gate /*
57260Sstevel@tonic-gate  * Handler for E$ scrub inum softint. Call the E$ scrubber until
57270Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
5728474Srscott  *
5729474Srscott  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5730474Srscott  * become negative after the atomic_add_32_nv().  This is not a problem, as
5731474Srscott  * the next trip around the loop won't scrub anything, and the next add will
5732474Srscott  * reset the count back to zero.
57330Sstevel@tonic-gate  */
57340Sstevel@tonic-gate /*ARGSUSED*/
57350Sstevel@tonic-gate static uint_t
57360Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
57370Sstevel@tonic-gate {
57380Sstevel@tonic-gate 	int i;
57390Sstevel@tonic-gate 	int how_many;
57400Sstevel@tonic-gate 	int outstanding;
57410Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
57420Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
57430Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
57440Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
57450Sstevel@tonic-gate 		ecache_scan_rate_idle : ecache_scan_rate_busy;
57460Sstevel@tonic-gate 	int ecache_nlines = csmp->chsm_ecache_nlines;
57470Sstevel@tonic-gate 
57480Sstevel@tonic-gate 	/*
57490Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
57500Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
57510Sstevel@tonic-gate 	 * cache is scanned every second.
57520Sstevel@tonic-gate 	 */
57530Sstevel@tonic-gate 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
57540Sstevel@tonic-gate 
57550Sstevel@tonic-gate 	do {
57560Sstevel@tonic-gate 		outstanding = *countp;
57570Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
57580Sstevel@tonic-gate 			scrub_ecache(how_many);
57590Sstevel@tonic-gate 		}
57600Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
57610Sstevel@tonic-gate 
57620Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
57630Sstevel@tonic-gate }
57640Sstevel@tonic-gate 
57650Sstevel@tonic-gate /*
57660Sstevel@tonic-gate  * Timeout function to reenable CE
57670Sstevel@tonic-gate  */
57680Sstevel@tonic-gate static void
57690Sstevel@tonic-gate cpu_delayed_check_ce_errors(void *arg)
57700Sstevel@tonic-gate {
57710Sstevel@tonic-gate 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
57720Sstevel@tonic-gate 	    TQ_NOSLEEP)) {
57730Sstevel@tonic-gate 		(void) timeout(cpu_delayed_check_ce_errors, arg,
57740Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
57750Sstevel@tonic-gate 	}
57760Sstevel@tonic-gate }
57770Sstevel@tonic-gate 
57780Sstevel@tonic-gate /*
57790Sstevel@tonic-gate  * CE Deferred Re-enable after trap.
57800Sstevel@tonic-gate  *
57810Sstevel@tonic-gate  * When the CPU gets a disrupting trap for any of the errors
57820Sstevel@tonic-gate  * controlled by the CEEN bit, CEEN is disabled in the trap handler
57830Sstevel@tonic-gate  * immediately. To eliminate the possibility of multiple CEs causing
57840Sstevel@tonic-gate  * recursive stack overflow in the trap handler, we cannot
57850Sstevel@tonic-gate  * reenable CEEN while still running in the trap handler. Instead,
57860Sstevel@tonic-gate  * after a CE is logged on a CPU, we schedule a timeout function,
57870Sstevel@tonic-gate  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
57880Sstevel@tonic-gate  * seconds. This function will check whether any further CEs
57890Sstevel@tonic-gate  * have occurred on that CPU, and if none have, will reenable CEEN.
57900Sstevel@tonic-gate  *
57910Sstevel@tonic-gate  * If further CEs have occurred while CEEN is disabled, another
57920Sstevel@tonic-gate  * timeout will be scheduled. This is to ensure that the CPU can
57930Sstevel@tonic-gate  * make progress in the face of CE 'storms', and that it does not
57940Sstevel@tonic-gate  * spend all its time logging CE errors.
57950Sstevel@tonic-gate  */
57960Sstevel@tonic-gate static void
57970Sstevel@tonic-gate cpu_check_ce_errors(void *arg)
57980Sstevel@tonic-gate {
5799946Smathue 	int	cpuid = (int)(uintptr_t)arg;
58000Sstevel@tonic-gate 	cpu_t	*cp;
58010Sstevel@tonic-gate 
58020Sstevel@tonic-gate 	/*
58030Sstevel@tonic-gate 	 * We acquire cpu_lock.
58040Sstevel@tonic-gate 	 */
58050Sstevel@tonic-gate 	ASSERT(curthread->t_pil == 0);
58060Sstevel@tonic-gate 
58070Sstevel@tonic-gate 	/*
58080Sstevel@tonic-gate 	 * verify that the cpu is still around, DR
58090Sstevel@tonic-gate 	 * could have got there first ...
58100Sstevel@tonic-gate 	 */
58110Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
58120Sstevel@tonic-gate 	cp = cpu_get(cpuid);
58130Sstevel@tonic-gate 	if (cp == NULL) {
58140Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
58150Sstevel@tonic-gate 		return;
58160Sstevel@tonic-gate 	}
58170Sstevel@tonic-gate 	/*
58180Sstevel@tonic-gate 	 * make sure we don't migrate across CPUs
58190Sstevel@tonic-gate 	 * while checking our CE status.
58200Sstevel@tonic-gate 	 */
58210Sstevel@tonic-gate 	kpreempt_disable();
58220Sstevel@tonic-gate 
58230Sstevel@tonic-gate 	/*
58240Sstevel@tonic-gate 	 * If we are running on the CPU that got the
58250Sstevel@tonic-gate 	 * CE, we can do the checks directly.
58260Sstevel@tonic-gate 	 */
58270Sstevel@tonic-gate 	if (cp->cpu_id == CPU->cpu_id) {
58280Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
58290Sstevel@tonic-gate 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
58300Sstevel@tonic-gate 		kpreempt_enable();
58310Sstevel@tonic-gate 		return;
58320Sstevel@tonic-gate 	}
58330Sstevel@tonic-gate 	kpreempt_enable();
58340Sstevel@tonic-gate 
58350Sstevel@tonic-gate 	/*
58360Sstevel@tonic-gate 	 * send an x-call to get the CPU that originally
58370Sstevel@tonic-gate 	 * got the CE to do the necessary checks. If we can't
58380Sstevel@tonic-gate 	 * send the x-call, reschedule the timeout, otherwise we
58390Sstevel@tonic-gate 	 * lose CEEN forever on that CPU.
58400Sstevel@tonic-gate 	 */
58410Sstevel@tonic-gate 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
58420Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
58430Sstevel@tonic-gate 		    TIMEOUT_CEEN_CHECK, 0);
58440Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
58450Sstevel@tonic-gate 	} else {
58460Sstevel@tonic-gate 		/*
58470Sstevel@tonic-gate 		 * When the CPU is not accepting xcalls, or
58480Sstevel@tonic-gate 		 * the processor is offlined, we don't want to
58490Sstevel@tonic-gate 		 * incur the extra overhead of trying to schedule the
58500Sstevel@tonic-gate 		 * CE timeout indefinitely. However, we don't want to lose
58510Sstevel@tonic-gate 		 * CE checking forever.
58520Sstevel@tonic-gate 		 *
58530Sstevel@tonic-gate 		 * Keep rescheduling the timeout, accepting the additional
58540Sstevel@tonic-gate 		 * overhead as the cost of correctness in the case where we get
58550Sstevel@tonic-gate 		 * a CE, disable CEEN, offline the CPU during the
58560Sstevel@tonic-gate 		 * the timeout interval, and then online it at some
58570Sstevel@tonic-gate 		 * point in the future. This is unlikely given the short
58580Sstevel@tonic-gate 		 * cpu_ceen_delay_secs.
58590Sstevel@tonic-gate 		 */
58600Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5861946Smathue 		(void) timeout(cpu_delayed_check_ce_errors,
5862946Smathue 		    (void *)(uintptr_t)cp->cpu_id,
58630Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
58640Sstevel@tonic-gate 	}
58650Sstevel@tonic-gate }
58660Sstevel@tonic-gate 
58670Sstevel@tonic-gate /*
58680Sstevel@tonic-gate  * This routine will check whether CEs have occurred while
58690Sstevel@tonic-gate  * CEEN is disabled. Any CEs detected will be logged and, if
58700Sstevel@tonic-gate  * possible, scrubbed.
58710Sstevel@tonic-gate  *
58720Sstevel@tonic-gate  * The memscrubber will also use this routine to clear any errors
58730Sstevel@tonic-gate  * caused by its scrubbing with CEEN disabled.
58740Sstevel@tonic-gate  *
58750Sstevel@tonic-gate  * flag == SCRUBBER_CEEN_CHECK
58760Sstevel@tonic-gate  *		called from memscrubber, just check/scrub, no reset
58770Sstevel@tonic-gate  *		paddr 	physical addr. for start of scrub pages
58780Sstevel@tonic-gate  *		vaddr 	virtual addr. for scrub area
58790Sstevel@tonic-gate  *		psz	page size of area to be scrubbed
58800Sstevel@tonic-gate  *
58810Sstevel@tonic-gate  * flag == TIMEOUT_CEEN_CHECK
58820Sstevel@tonic-gate  *		timeout function has triggered, reset timeout or CEEN
58830Sstevel@tonic-gate  *
58840Sstevel@tonic-gate  * Note: We must not migrate cpus during this function.  This can be
58850Sstevel@tonic-gate  * achieved by one of:
58860Sstevel@tonic-gate  *    - invoking as target of an x-call in which case we're at XCALL_PIL
58870Sstevel@tonic-gate  *	The flag value must be first xcall argument.
58880Sstevel@tonic-gate  *    - disabling kernel preemption.  This should be done for very short
58890Sstevel@tonic-gate  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
58900Sstevel@tonic-gate  *	scrub an extended area with cpu_check_block.  The call for
58910Sstevel@tonic-gate  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
58920Sstevel@tonic-gate  *	brief for this case.
58930Sstevel@tonic-gate  *    - binding to a cpu, eg with thread_affinity_set().  This is used
58940Sstevel@tonic-gate  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
58950Sstevel@tonic-gate  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
58960Sstevel@tonic-gate  */
58970Sstevel@tonic-gate void
58980Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
58990Sstevel@tonic-gate {
59000Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
59010Sstevel@tonic-gate 	uint64_t	ec_err_enable;
59020Sstevel@tonic-gate 	uint64_t	page_offset;
59030Sstevel@tonic-gate 
59040Sstevel@tonic-gate 	/* Read AFSR */
59050Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
59060Sstevel@tonic-gate 
59070Sstevel@tonic-gate 	/*
59080Sstevel@tonic-gate 	 * If no CEEN errors have occurred during the timeout
59090Sstevel@tonic-gate 	 * interval, it is safe to re-enable CEEN and exit.
59100Sstevel@tonic-gate 	 */
59110Sstevel@tonic-gate 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
59120Sstevel@tonic-gate 		if (flag == TIMEOUT_CEEN_CHECK &&
59130Sstevel@tonic-gate 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
59140Sstevel@tonic-gate 			set_error_enable(ec_err_enable | EN_REG_CEEN);
59150Sstevel@tonic-gate 		return;
59160Sstevel@tonic-gate 	}
59170Sstevel@tonic-gate 
59180Sstevel@tonic-gate 	/*
59190Sstevel@tonic-gate 	 * Ensure that CEEN was not reenabled (maybe by DR) before
59200Sstevel@tonic-gate 	 * we log/clear the error.
59210Sstevel@tonic-gate 	 */
59220Sstevel@tonic-gate 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
59230Sstevel@tonic-gate 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
59240Sstevel@tonic-gate 
59250Sstevel@tonic-gate 	/*
59260Sstevel@tonic-gate 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
59270Sstevel@tonic-gate 	 * timeout will be rescheduled when the error is logged.
59280Sstevel@tonic-gate 	 */
59290Sstevel@tonic-gate 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
59300Sstevel@tonic-gate 	    cpu_ce_detected(&cpu_error_regs,
59310Sstevel@tonic-gate 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
59320Sstevel@tonic-gate 	else
59330Sstevel@tonic-gate 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
59340Sstevel@tonic-gate 
59350Sstevel@tonic-gate 	/*
59360Sstevel@tonic-gate 	 * If the memory scrubber runs while CEEN is
59370Sstevel@tonic-gate 	 * disabled, (or if CEEN is disabled during the
59380Sstevel@tonic-gate 	 * scrub as a result of a CE being triggered by
59390Sstevel@tonic-gate 	 * it), the range being scrubbed will not be
59400Sstevel@tonic-gate 	 * completely cleaned. If there are multiple CEs
59410Sstevel@tonic-gate 	 * in the range at most two of these will be dealt
59420Sstevel@tonic-gate 	 * with, (one by the trap handler and one by the
59430Sstevel@tonic-gate 	 * timeout). It is also possible that none are dealt
59440Sstevel@tonic-gate 	 * with, (CEEN disabled and another CE occurs before
59450Sstevel@tonic-gate 	 * the timeout triggers). So to ensure that the
59460Sstevel@tonic-gate 	 * memory is actually scrubbed, we have to access each
59470Sstevel@tonic-gate 	 * memory location in the range and then check whether
59480Sstevel@tonic-gate 	 * that access causes a CE.
59490Sstevel@tonic-gate 	 */
59500Sstevel@tonic-gate 	if (flag == SCRUBBER_CEEN_CHECK && va) {
59510Sstevel@tonic-gate 		if ((cpu_error_regs.afar >= pa) &&
59520Sstevel@tonic-gate 		    (cpu_error_regs.afar < (pa + psz))) {
59530Sstevel@tonic-gate 			/*
59540Sstevel@tonic-gate 			 * Force a load from physical memory for each
59550Sstevel@tonic-gate 			 * 64-byte block, then check AFSR to determine
59560Sstevel@tonic-gate 			 * whether this access caused an error.
59570Sstevel@tonic-gate 			 *
59580Sstevel@tonic-gate 			 * This is a slow way to do a scrub, but as it will
59590Sstevel@tonic-gate 			 * only be invoked when the memory scrubber actually
59600Sstevel@tonic-gate 			 * triggered a CE, it should not happen too
59610Sstevel@tonic-gate 			 * frequently.
59620Sstevel@tonic-gate 			 *
59630Sstevel@tonic-gate 			 * cut down what we need to check as the scrubber
59640Sstevel@tonic-gate 			 * has verified up to AFAR, so get it's offset
59650Sstevel@tonic-gate 			 * into the page and start there.
59660Sstevel@tonic-gate 			 */
59670Sstevel@tonic-gate 			page_offset = (uint64_t)(cpu_error_regs.afar &
59680Sstevel@tonic-gate 			    (psz - 1));
59690Sstevel@tonic-gate 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
59700Sstevel@tonic-gate 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
59710Sstevel@tonic-gate 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
59720Sstevel@tonic-gate 			    psz);
59730Sstevel@tonic-gate 		}
59740Sstevel@tonic-gate 	}
59750Sstevel@tonic-gate 
59760Sstevel@tonic-gate 	/*
59770Sstevel@tonic-gate 	 * Reset error enable if this CE is not masked.
59780Sstevel@tonic-gate 	 */
59790Sstevel@tonic-gate 	if ((flag == TIMEOUT_CEEN_CHECK) &&
59800Sstevel@tonic-gate 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
59810Sstevel@tonic-gate 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
59820Sstevel@tonic-gate 
59830Sstevel@tonic-gate }
59840Sstevel@tonic-gate 
59850Sstevel@tonic-gate /*
59860Sstevel@tonic-gate  * Attempt a cpu logout for an error that we did not trap for, such
59870Sstevel@tonic-gate  * as a CE noticed with CEEN off.  It is assumed that we are still running
59880Sstevel@tonic-gate  * on the cpu that took the error and that we cannot migrate.  Returns
59890Sstevel@tonic-gate  * 0 on success, otherwise nonzero.
59900Sstevel@tonic-gate  */
59910Sstevel@tonic-gate static int
59920Sstevel@tonic-gate cpu_ce_delayed_ec_logout(uint64_t afar)
59930Sstevel@tonic-gate {
59940Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
59950Sstevel@tonic-gate 
59960Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL)
59970Sstevel@tonic-gate 		return (0);
59980Sstevel@tonic-gate 
59990Sstevel@tonic-gate 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
60000Sstevel@tonic-gate 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
60010Sstevel@tonic-gate 	    LOGOUT_INVALID)
60020Sstevel@tonic-gate 		return (0);
60030Sstevel@tonic-gate 
60040Sstevel@tonic-gate 	cpu_delayed_logout(afar, clop);
60050Sstevel@tonic-gate 	return (1);
60060Sstevel@tonic-gate }
60070Sstevel@tonic-gate 
60080Sstevel@tonic-gate /*
60090Sstevel@tonic-gate  * We got an error while CEEN was disabled. We
60100Sstevel@tonic-gate  * need to clean up after it and log whatever
60110Sstevel@tonic-gate  * information we have on the CE.
60120Sstevel@tonic-gate  */
60130Sstevel@tonic-gate void
60140Sstevel@tonic-gate cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
60150Sstevel@tonic-gate {
60160Sstevel@tonic-gate 	ch_async_flt_t 	ch_flt;
60170Sstevel@tonic-gate 	struct async_flt *aflt;
60180Sstevel@tonic-gate 	char 		pr_reason[MAX_REASON_STRING];
60190Sstevel@tonic-gate 
60200Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
60210Sstevel@tonic-gate 	ch_flt.flt_trapped_ce = flag;
60220Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
60230Sstevel@tonic-gate 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
60240Sstevel@tonic-gate 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
60250Sstevel@tonic-gate 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
60260Sstevel@tonic-gate 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
60270Sstevel@tonic-gate 	aflt->flt_addr = cpu_error_regs->afar;
60280Sstevel@tonic-gate #if defined(SERRANO)
60290Sstevel@tonic-gate 	ch_flt.afar2 = cpu_error_regs->afar2;
60300Sstevel@tonic-gate #endif	/* SERRANO */
60310Sstevel@tonic-gate 	aflt->flt_pc = NULL;
60320Sstevel@tonic-gate 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
60330Sstevel@tonic-gate 	aflt->flt_tl = 0;
60340Sstevel@tonic-gate 	aflt->flt_panic = 0;
60350Sstevel@tonic-gate 	cpu_log_and_clear_ce(&ch_flt);
60360Sstevel@tonic-gate 
60370Sstevel@tonic-gate 	/*
60380Sstevel@tonic-gate 	 * check if we caused any errors during cleanup
60390Sstevel@tonic-gate 	 */
60400Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
60410Sstevel@tonic-gate 		pr_reason[0] = '\0';
60420Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
60430Sstevel@tonic-gate 		    NULL);
60440Sstevel@tonic-gate 	}
60450Sstevel@tonic-gate }
60460Sstevel@tonic-gate 
60470Sstevel@tonic-gate /*
60480Sstevel@tonic-gate  * Log/clear CEEN-controlled disrupting errors
60490Sstevel@tonic-gate  */
60500Sstevel@tonic-gate static void
60510Sstevel@tonic-gate cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
60520Sstevel@tonic-gate {
60530Sstevel@tonic-gate 	struct async_flt *aflt;
60540Sstevel@tonic-gate 	uint64_t afsr, afsr_errs;
60550Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
60560Sstevel@tonic-gate 	char 		pr_reason[MAX_REASON_STRING];
60570Sstevel@tonic-gate 	on_trap_data_t	*otp = curthread->t_ontrap;
60580Sstevel@tonic-gate 
60590Sstevel@tonic-gate 	aflt = (struct async_flt *)ch_flt;
60600Sstevel@tonic-gate 	afsr = aflt->flt_stat;
60610Sstevel@tonic-gate 	afsr_errs = ch_flt->afsr_errs;
60620Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
60630Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
60640Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
60650Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
60660Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
60670Sstevel@tonic-gate 	aflt->flt_status = ECC_C_TRAP;
60680Sstevel@tonic-gate 
60690Sstevel@tonic-gate 	pr_reason[0] = '\0';
60700Sstevel@tonic-gate 	/*
60710Sstevel@tonic-gate 	 * Get the CPU log out info for Disrupting Trap.
60720Sstevel@tonic-gate 	 */
60730Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
60740Sstevel@tonic-gate 		clop = NULL;
60750Sstevel@tonic-gate 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
60760Sstevel@tonic-gate 	} else {
60770Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
60780Sstevel@tonic-gate 	}
60790Sstevel@tonic-gate 
60800Sstevel@tonic-gate 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
60810Sstevel@tonic-gate 		ch_cpu_errors_t cpu_error_regs;
60820Sstevel@tonic-gate 
60830Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
60840Sstevel@tonic-gate 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
60850Sstevel@tonic-gate 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
60860Sstevel@tonic-gate 		clop->clo_data.chd_afar = cpu_error_regs.afar;
60870Sstevel@tonic-gate 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
60880Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
60890Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
60900Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afsr_ext =
60910Sstevel@tonic-gate 		    cpu_error_regs.shadow_afsr_ext;
60920Sstevel@tonic-gate #if defined(SERRANO)
60930Sstevel@tonic-gate 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
60940Sstevel@tonic-gate #endif	/* SERRANO */
60950Sstevel@tonic-gate 		ch_flt->flt_data_incomplete = 1;
60960Sstevel@tonic-gate 
60970Sstevel@tonic-gate 		/*
60980Sstevel@tonic-gate 		 * The logging/clear code expects AFSR/AFAR to be cleared.
60990Sstevel@tonic-gate 		 * The trap handler does it for CEEN enabled errors
61000Sstevel@tonic-gate 		 * so we need to do it here.
61010Sstevel@tonic-gate 		 */
61020Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
61030Sstevel@tonic-gate 	}
61040Sstevel@tonic-gate 
61050Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
61060Sstevel@tonic-gate 	/*
61070Sstevel@tonic-gate 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
61080Sstevel@tonic-gate 	 * For Serrano, even thou we do have the AFAR, we still do the
61090Sstevel@tonic-gate 	 * scrub on the RCE side since that's where the error type can
61100Sstevel@tonic-gate 	 * be properly classified as intermittent, persistent, etc.
61110Sstevel@tonic-gate 	 *
61120Sstevel@tonic-gate 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
61130Sstevel@tonic-gate 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
61140Sstevel@tonic-gate 	 * the flt_status bits.
61150Sstevel@tonic-gate 	 */
61160Sstevel@tonic-gate 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
61170Sstevel@tonic-gate 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
61180Sstevel@tonic-gate 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
61190Sstevel@tonic-gate 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
61200Sstevel@tonic-gate 	}
61210Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
61220Sstevel@tonic-gate 	/*
61230Sstevel@tonic-gate 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
61240Sstevel@tonic-gate 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
61250Sstevel@tonic-gate 	 * the flt_status bits.
61260Sstevel@tonic-gate 	 */
61270Sstevel@tonic-gate 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
61280Sstevel@tonic-gate 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
61290Sstevel@tonic-gate 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
61300Sstevel@tonic-gate 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
61310Sstevel@tonic-gate 		}
61320Sstevel@tonic-gate 	}
61330Sstevel@tonic-gate 
61340Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
61350Sstevel@tonic-gate 
61360Sstevel@tonic-gate 	/*
61370Sstevel@tonic-gate 	 * Update flt_prot if this error occurred under on_trap protection.
61380Sstevel@tonic-gate 	 */
61390Sstevel@tonic-gate 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
61400Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_EC;
61410Sstevel@tonic-gate 
61420Sstevel@tonic-gate 	/*
61430Sstevel@tonic-gate 	 * Queue events on the async event queue, one event per error bit.
61440Sstevel@tonic-gate 	 */
61450Sstevel@tonic-gate 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
61460Sstevel@tonic-gate 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
61470Sstevel@tonic-gate 		ch_flt->flt_type = CPU_INV_AFSR;
61480Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
61490Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
61500Sstevel@tonic-gate 		    aflt->flt_panic);
61510Sstevel@tonic-gate 	}
61520Sstevel@tonic-gate 
61530Sstevel@tonic-gate 	/*
61540Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
61550Sstevel@tonic-gate 	 */
61560Sstevel@tonic-gate 	if (clop) {
61570Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
61580Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
61590Sstevel@tonic-gate 	}
61600Sstevel@tonic-gate 
61610Sstevel@tonic-gate 	/*
61620Sstevel@tonic-gate 	 * If either a CPC, WDC or EDC error has occurred while CEEN
61630Sstevel@tonic-gate 	 * was disabled, we need to flush either the entire
61640Sstevel@tonic-gate 	 * E$ or an E$ line.
61650Sstevel@tonic-gate 	 */
61660Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
61670Sstevel@tonic-gate 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
61680Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
61690Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
61700Sstevel@tonic-gate 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
61710Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
61720Sstevel@tonic-gate 		cpu_error_ecache_flush(ch_flt);
61730Sstevel@tonic-gate 
61740Sstevel@tonic-gate }
61750Sstevel@tonic-gate 
61760Sstevel@tonic-gate /*
61770Sstevel@tonic-gate  * depending on the error type, we determine whether we
61780Sstevel@tonic-gate  * need to flush the entire ecache or just a line.
61790Sstevel@tonic-gate  */
61800Sstevel@tonic-gate static int
61810Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
61820Sstevel@tonic-gate {
61830Sstevel@tonic-gate 	struct async_flt *aflt;
61840Sstevel@tonic-gate 	uint64_t	afsr;
61850Sstevel@tonic-gate 	uint64_t	afsr_errs = ch_flt->afsr_errs;
61860Sstevel@tonic-gate 
61870Sstevel@tonic-gate 	aflt = (struct async_flt *)ch_flt;
61880Sstevel@tonic-gate 	afsr = aflt->flt_stat;
61890Sstevel@tonic-gate 
61900Sstevel@tonic-gate 	/*
61910Sstevel@tonic-gate 	 * If we got multiple errors, no point in trying
61920Sstevel@tonic-gate 	 * the individual cases, just flush the whole cache
61930Sstevel@tonic-gate 	 */
61940Sstevel@tonic-gate 	if (afsr & C_AFSR_ME) {
61950Sstevel@tonic-gate 		return (ECACHE_FLUSH_ALL);
61960Sstevel@tonic-gate 	}
61970Sstevel@tonic-gate 
61980Sstevel@tonic-gate 	/*
61990Sstevel@tonic-gate 	 * If either a CPC, WDC or EDC error has occurred while CEEN
62000Sstevel@tonic-gate 	 * was disabled, we need to flush entire E$. We can't just
62010Sstevel@tonic-gate 	 * flush the cache line affected as the ME bit
62020Sstevel@tonic-gate 	 * is not set when multiple correctable errors of the same
62030Sstevel@tonic-gate 	 * type occur, so we might have multiple CPC or EDC errors,
62040Sstevel@tonic-gate 	 * with only the first recorded.
62050Sstevel@tonic-gate 	 */
62060Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
62070Sstevel@tonic-gate 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
62080Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
62090Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
62100Sstevel@tonic-gate 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
62110Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
62120Sstevel@tonic-gate 		return (ECACHE_FLUSH_ALL);
62130Sstevel@tonic-gate 	}
62140Sstevel@tonic-gate 
62150Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
62160Sstevel@tonic-gate 	/*
62170Sstevel@tonic-gate 	 * If only UE or RUE is set, flush the Ecache line, otherwise
62180Sstevel@tonic-gate 	 * flush the entire Ecache.
62190Sstevel@tonic-gate 	 */
62200Sstevel@tonic-gate 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
62210Sstevel@tonic-gate 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
62220Sstevel@tonic-gate 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
62230Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
62240Sstevel@tonic-gate 		} else {
62250Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
62260Sstevel@tonic-gate 		}
62270Sstevel@tonic-gate 	}
62280Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
62290Sstevel@tonic-gate 	/*
62300Sstevel@tonic-gate 	 * If UE only is set, flush the Ecache line, otherwise
62310Sstevel@tonic-gate 	 * flush the entire Ecache.
62320Sstevel@tonic-gate 	 */
62330Sstevel@tonic-gate 	if (afsr_errs & C_AFSR_UE) {
62340Sstevel@tonic-gate 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
62350Sstevel@tonic-gate 		    C_AFSR_UE) {
62360Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
62370Sstevel@tonic-gate 		} else {
62380Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
62390Sstevel@tonic-gate 		}
62400Sstevel@tonic-gate 	}
62410Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
62420Sstevel@tonic-gate 
62430Sstevel@tonic-gate 	/*
62440Sstevel@tonic-gate 	 * EDU: If EDU only is set, flush the ecache line, otherwise
62450Sstevel@tonic-gate 	 * flush the entire Ecache.
62460Sstevel@tonic-gate 	 */
62470Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
62480Sstevel@tonic-gate 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
62490Sstevel@tonic-gate 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
62500Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
62510Sstevel@tonic-gate 		} else {
62520Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
62530Sstevel@tonic-gate 		}
62540Sstevel@tonic-gate 	}
62550Sstevel@tonic-gate 
62560Sstevel@tonic-gate 	/*
62570Sstevel@tonic-gate 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
62580Sstevel@tonic-gate 	 * flush the entire Ecache.
62590Sstevel@tonic-gate 	 */
62600Sstevel@tonic-gate 	if (afsr_errs & C_AFSR_BERR) {
62610Sstevel@tonic-gate 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
62620Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
62630Sstevel@tonic-gate 		} else {
62640Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
62650Sstevel@tonic-gate 		}
62660Sstevel@tonic-gate 	}
62670Sstevel@tonic-gate 
62680Sstevel@tonic-gate 	return (0);
62690Sstevel@tonic-gate }
62700Sstevel@tonic-gate 
62710Sstevel@tonic-gate void
62720Sstevel@tonic-gate cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
62730Sstevel@tonic-gate {
62740Sstevel@tonic-gate 	int	ecache_flush_flag =
62750Sstevel@tonic-gate 	    cpu_error_ecache_flush_required(ch_flt);
62760Sstevel@tonic-gate 
62770Sstevel@tonic-gate 	/*
62780Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache based on above checks.
62790Sstevel@tonic-gate 	 */
62800Sstevel@tonic-gate 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
62810Sstevel@tonic-gate 		cpu_flush_ecache();
62820Sstevel@tonic-gate 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
62830Sstevel@tonic-gate 		cpu_flush_ecache_line(ch_flt);
62840Sstevel@tonic-gate 	}
62850Sstevel@tonic-gate 
62860Sstevel@tonic-gate }
62870Sstevel@tonic-gate 
62880Sstevel@tonic-gate /*
62890Sstevel@tonic-gate  * Extract the PA portion from the E$ tag.
62900Sstevel@tonic-gate  */
62910Sstevel@tonic-gate uint64_t
62920Sstevel@tonic-gate cpu_ectag_to_pa(int setsize, uint64_t tag)
62930Sstevel@tonic-gate {
62940Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
62950Sstevel@tonic-gate 		return (JG_ECTAG_TO_PA(setsize, tag));
62960Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
62970Sstevel@tonic-gate 		return (PN_L3TAG_TO_PA(tag));
62980Sstevel@tonic-gate 	else
62990Sstevel@tonic-gate 		return (CH_ECTAG_TO_PA(setsize, tag));
63000Sstevel@tonic-gate }
63010Sstevel@tonic-gate 
63020Sstevel@tonic-gate /*
63030Sstevel@tonic-gate  * Convert the E$ tag PA into an E$ subblock index.
63040Sstevel@tonic-gate  */
63050Sstevel@tonic-gate static int
63060Sstevel@tonic-gate cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
63070Sstevel@tonic-gate {
63080Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
63090Sstevel@tonic-gate 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
63100Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
63110Sstevel@tonic-gate 		/* Panther has only one subblock per line */
63120Sstevel@tonic-gate 		return (0);
63130Sstevel@tonic-gate 	else
63140Sstevel@tonic-gate 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
63150Sstevel@tonic-gate }
63160Sstevel@tonic-gate 
63170Sstevel@tonic-gate /*
63180Sstevel@tonic-gate  * All subblocks in an E$ line must be invalid for
63190Sstevel@tonic-gate  * the line to be invalid.
63200Sstevel@tonic-gate  */
63210Sstevel@tonic-gate int
63220Sstevel@tonic-gate cpu_ectag_line_invalid(int cachesize, uint64_t tag)
63230Sstevel@tonic-gate {
63240Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
63250Sstevel@tonic-gate 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
63260Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
63270Sstevel@tonic-gate 		return (PN_L3_LINE_INVALID(tag));
63280Sstevel@tonic-gate 	else
63290Sstevel@tonic-gate 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
63300Sstevel@tonic-gate }
63310Sstevel@tonic-gate 
63320Sstevel@tonic-gate /*
63330Sstevel@tonic-gate  * Extract state bits for a subblock given the tag.  Note that for Panther
63340Sstevel@tonic-gate  * this works on both l2 and l3 tags.
63350Sstevel@tonic-gate  */
63360Sstevel@tonic-gate static int
63370Sstevel@tonic-gate cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
63380Sstevel@tonic-gate {
63390Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
63400Sstevel@tonic-gate 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
63410Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
63420Sstevel@tonic-gate 		return (tag & CH_ECSTATE_MASK);
63430Sstevel@tonic-gate 	else
63440Sstevel@tonic-gate 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
63450Sstevel@tonic-gate }
63460Sstevel@tonic-gate 
63470Sstevel@tonic-gate /*
63480Sstevel@tonic-gate  * Cpu specific initialization.
63490Sstevel@tonic-gate  */
63500Sstevel@tonic-gate void
63510Sstevel@tonic-gate cpu_mp_init(void)
63520Sstevel@tonic-gate {
63530Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
63540Sstevel@tonic-gate 	if (cheetah_sendmondo_recover) {
63550Sstevel@tonic-gate 		cheetah_nudge_init();
63560Sstevel@tonic-gate 	}
63570Sstevel@tonic-gate #endif
63580Sstevel@tonic-gate }
63590Sstevel@tonic-gate 
63600Sstevel@tonic-gate void
63610Sstevel@tonic-gate cpu_ereport_post(struct async_flt *aflt)
63620Sstevel@tonic-gate {
63630Sstevel@tonic-gate 	char *cpu_type, buf[FM_MAX_CLASS];
63640Sstevel@tonic-gate 	nv_alloc_t *nva = NULL;
63650Sstevel@tonic-gate 	nvlist_t *ereport, *detector, *resource;
63660Sstevel@tonic-gate 	errorq_elem_t *eqep;
63670Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
63680Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
63690Sstevel@tonic-gate 	int len = 0;
63700Sstevel@tonic-gate 	uint8_t  msg_type;
63710Sstevel@tonic-gate 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
63720Sstevel@tonic-gate 
63730Sstevel@tonic-gate 	if (aflt->flt_panic || panicstr) {
63740Sstevel@tonic-gate 		eqep = errorq_reserve(ereport_errorq);
63750Sstevel@tonic-gate 		if (eqep == NULL)
63760Sstevel@tonic-gate 			return;
63770Sstevel@tonic-gate 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
63780Sstevel@tonic-gate 		nva = errorq_elem_nva(ereport_errorq, eqep);
63790Sstevel@tonic-gate 	} else {
63800Sstevel@tonic-gate 		ereport = fm_nvlist_create(nva);
63810Sstevel@tonic-gate 	}
63820Sstevel@tonic-gate 
63830Sstevel@tonic-gate 	/*
63840Sstevel@tonic-gate 	 * Create the scheme "cpu" FMRI.
63850Sstevel@tonic-gate 	 */
63860Sstevel@tonic-gate 	detector = fm_nvlist_create(nva);
63870Sstevel@tonic-gate 	resource = fm_nvlist_create(nva);
63880Sstevel@tonic-gate 	switch (cpunodes[aflt->flt_inst].implementation) {
63890Sstevel@tonic-gate 	case CHEETAH_IMPL:
63900Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIII;
63910Sstevel@tonic-gate 		break;
63920Sstevel@tonic-gate 	case CHEETAH_PLUS_IMPL:
63930Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIplus;
63940Sstevel@tonic-gate 		break;
63950Sstevel@tonic-gate 	case JALAPENO_IMPL:
63960Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIi;
63970Sstevel@tonic-gate 		break;
63980Sstevel@tonic-gate 	case SERRANO_IMPL:
63990Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
64000Sstevel@tonic-gate 		break;
64010Sstevel@tonic-gate 	case JAGUAR_IMPL:
64020Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIV;
64030Sstevel@tonic-gate 		break;
64040Sstevel@tonic-gate 	case PANTHER_IMPL:
64050Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIVplus;
64060Sstevel@tonic-gate 		break;
64070Sstevel@tonic-gate 	default:
64080Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
64090Sstevel@tonic-gate 		break;
64100Sstevel@tonic-gate 	}
64110Sstevel@tonic-gate 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
64120Sstevel@tonic-gate 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
64130Sstevel@tonic-gate 	    cpunodes[aflt->flt_inst].device_id);
64140Sstevel@tonic-gate 
64150Sstevel@tonic-gate 	/*
64160Sstevel@tonic-gate 	 * Encode all the common data into the ereport.
64170Sstevel@tonic-gate 	 */
64180Sstevel@tonic-gate 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
64190Sstevel@tonic-gate 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
64200Sstevel@tonic-gate 
64210Sstevel@tonic-gate 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
64220Sstevel@tonic-gate 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
64230Sstevel@tonic-gate 	    detector, NULL);
64240Sstevel@tonic-gate 
64250Sstevel@tonic-gate 	/*
64260Sstevel@tonic-gate 	 * Encode the error specific data that was saved in
64270Sstevel@tonic-gate 	 * the async_flt structure into the ereport.
64280Sstevel@tonic-gate 	 */
64290Sstevel@tonic-gate 	cpu_payload_add_aflt(aflt, ereport, resource,
64300Sstevel@tonic-gate 	    &plat_ecc_ch_flt.ecaf_afar_status,
64310Sstevel@tonic-gate 	    &plat_ecc_ch_flt.ecaf_synd_status);
64320Sstevel@tonic-gate 
64330Sstevel@tonic-gate 	if (aflt->flt_panic || panicstr) {
64340Sstevel@tonic-gate 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
64350Sstevel@tonic-gate 	} else {
64360Sstevel@tonic-gate 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
64370Sstevel@tonic-gate 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
64380Sstevel@tonic-gate 		fm_nvlist_destroy(detector, FM_NVA_FREE);
64390Sstevel@tonic-gate 		fm_nvlist_destroy(resource, FM_NVA_FREE);
64400Sstevel@tonic-gate 	}
64410Sstevel@tonic-gate 	/*
64420Sstevel@tonic-gate 	 * Send the enhanced error information (plat_ecc_error2_data_t)
64430Sstevel@tonic-gate 	 * to the SC olny if it can process it.
64440Sstevel@tonic-gate 	 */
64450Sstevel@tonic-gate 
64460Sstevel@tonic-gate 	if (&plat_ecc_capability_sc_get &&
64470Sstevel@tonic-gate 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
64480Sstevel@tonic-gate 		msg_type = cpu_flt_bit_to_plat_error(aflt);
64490Sstevel@tonic-gate 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
64500Sstevel@tonic-gate 			/*
64510Sstevel@tonic-gate 			 * If afar status is not invalid do a unum lookup.
64520Sstevel@tonic-gate 			 */
64530Sstevel@tonic-gate 			if (plat_ecc_ch_flt.ecaf_afar_status !=
64540Sstevel@tonic-gate 			    AFLT_STAT_INVALID) {
64550Sstevel@tonic-gate 				(void) cpu_get_mem_unum_aflt(
64560Sstevel@tonic-gate 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
64570Sstevel@tonic-gate 				    unum, UNUM_NAMLEN, &len);
64580Sstevel@tonic-gate 			} else {
64590Sstevel@tonic-gate 				unum[0] = '\0';
64600Sstevel@tonic-gate 			}
64610Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
64620Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
64630Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
64640Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
64650Sstevel@tonic-gate 			    ch_flt->flt_sdw_afsr_ext;
64660Sstevel@tonic-gate 
64670Sstevel@tonic-gate 			if (&plat_log_fruid_error2)
64680Sstevel@tonic-gate 				plat_log_fruid_error2(msg_type, unum, aflt,
64690Sstevel@tonic-gate 				    &plat_ecc_ch_flt);
64700Sstevel@tonic-gate 		}
64710Sstevel@tonic-gate 	}
64720Sstevel@tonic-gate }
64730Sstevel@tonic-gate 
64740Sstevel@tonic-gate void
64750Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
64760Sstevel@tonic-gate {
64770Sstevel@tonic-gate 	int status;
64780Sstevel@tonic-gate 	ddi_fm_error_t de;
64790Sstevel@tonic-gate 
64800Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
64810Sstevel@tonic-gate 
64820Sstevel@tonic-gate 	de.fme_version = DDI_FME_VERSION;
64830Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
64840Sstevel@tonic-gate 	    FM_ENA_FMT1);
64850Sstevel@tonic-gate 	de.fme_flag = expected;
64860Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
64870Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
64880Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
64890Sstevel@tonic-gate 		aflt->flt_panic = 1;
64900Sstevel@tonic-gate }
64910Sstevel@tonic-gate 
64920Sstevel@tonic-gate void
64930Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
64940Sstevel@tonic-gate     errorq_t *eqp, uint_t flag)
64950Sstevel@tonic-gate {
64960Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)payload;
64970Sstevel@tonic-gate 
64980Sstevel@tonic-gate 	aflt->flt_erpt_class = error_class;
64990Sstevel@tonic-gate 	errorq_dispatch(eqp, payload, payload_sz, flag);
65000Sstevel@tonic-gate }
65010Sstevel@tonic-gate 
65020Sstevel@tonic-gate /*
65030Sstevel@tonic-gate  * This routine may be called by the IO module, but does not do
65040Sstevel@tonic-gate  * anything in this cpu module. The SERD algorithm is handled by
65050Sstevel@tonic-gate  * cpumem-diagnosis engine instead.
65060Sstevel@tonic-gate  */
65070Sstevel@tonic-gate /*ARGSUSED*/
65080Sstevel@tonic-gate void
65090Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
65100Sstevel@tonic-gate {}
65110Sstevel@tonic-gate 
65120Sstevel@tonic-gate void
65130Sstevel@tonic-gate adjust_hw_copy_limits(int ecache_size)
65140Sstevel@tonic-gate {
65150Sstevel@tonic-gate 	/*
65160Sstevel@tonic-gate 	 * Set hw copy limits.
65170Sstevel@tonic-gate 	 *
65180Sstevel@tonic-gate 	 * /etc/system will be parsed later and can override one or more
65190Sstevel@tonic-gate 	 * of these settings.
65200Sstevel@tonic-gate 	 *
65210Sstevel@tonic-gate 	 * At this time, ecache size seems only mildly relevant.
65220Sstevel@tonic-gate 	 * We seem to run into issues with the d-cache and stalls
65230Sstevel@tonic-gate 	 * we see on misses.
65240Sstevel@tonic-gate 	 *
65250Sstevel@tonic-gate 	 * Cycle measurement indicates that 2 byte aligned copies fare
65260Sstevel@tonic-gate 	 * little better than doing things with VIS at around 512 bytes.
65270Sstevel@tonic-gate 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
65280Sstevel@tonic-gate 	 * aligned is faster whenever the source and destination data
65290Sstevel@tonic-gate 	 * in cache and the total size is less than 2 Kbytes.  The 2K
65300Sstevel@tonic-gate 	 * limit seems to be driven by the 2K write cache.
65310Sstevel@tonic-gate 	 * When more than 2K of copies are done in non-VIS mode, stores
65320Sstevel@tonic-gate 	 * backup in the write cache.  In VIS mode, the write cache is
65330Sstevel@tonic-gate 	 * bypassed, allowing faster cache-line writes aligned on cache
65340Sstevel@tonic-gate 	 * boundaries.
65350Sstevel@tonic-gate 	 *
65360Sstevel@tonic-gate 	 * In addition, in non-VIS mode, there is no prefetching, so
65370Sstevel@tonic-gate 	 * for larger copies, the advantage of prefetching to avoid even
65380Sstevel@tonic-gate 	 * occasional cache misses is enough to justify using the VIS code.
65390Sstevel@tonic-gate 	 *
65400Sstevel@tonic-gate 	 * During testing, it was discovered that netbench ran 3% slower
65410Sstevel@tonic-gate 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
65420Sstevel@tonic-gate 	 * applications, data is only used once (copied to the output
65430Sstevel@tonic-gate 	 * buffer, then copied by the network device off the system).  Using
65440Sstevel@tonic-gate 	 * the VIS copy saves more L2 cache state.  Network copies are
65450Sstevel@tonic-gate 	 * around 1.3K to 1.5K in size for historical reasons.
65460Sstevel@tonic-gate 	 *
65470Sstevel@tonic-gate 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
65480Sstevel@tonic-gate 	 * aligned copy even for large caches and 8 MB ecache.  The
65490Sstevel@tonic-gate 	 * infrastructure to allow different limits for different sized
65500Sstevel@tonic-gate 	 * caches is kept to allow further tuning in later releases.
65510Sstevel@tonic-gate 	 */
65520Sstevel@tonic-gate 
65530Sstevel@tonic-gate 	if (min_ecache_size == 0 && use_hw_bcopy) {
65540Sstevel@tonic-gate 		/*
65550Sstevel@tonic-gate 		 * First time through - should be before /etc/system
65560Sstevel@tonic-gate 		 * is read.
65570Sstevel@tonic-gate 		 * Could skip the checks for zero but this lets us
65580Sstevel@tonic-gate 		 * preserve any debugger rewrites.
65590Sstevel@tonic-gate 		 */
65600Sstevel@tonic-gate 		if (hw_copy_limit_1 == 0) {
65610Sstevel@tonic-gate 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
65620Sstevel@tonic-gate 			priv_hcl_1 = hw_copy_limit_1;
65630Sstevel@tonic-gate 		}
65640Sstevel@tonic-gate 		if (hw_copy_limit_2 == 0) {
65650Sstevel@tonic-gate 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
65660Sstevel@tonic-gate 			priv_hcl_2 = hw_copy_limit_2;
65670Sstevel@tonic-gate 		}
65680Sstevel@tonic-gate 		if (hw_copy_limit_4 == 0) {
65690Sstevel@tonic-gate 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
65700Sstevel@tonic-gate 			priv_hcl_4 = hw_copy_limit_4;
65710Sstevel@tonic-gate 		}
65720Sstevel@tonic-gate 		if (hw_copy_limit_8 == 0) {
65730Sstevel@tonic-gate 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
65740Sstevel@tonic-gate 			priv_hcl_8 = hw_copy_limit_8;
65750Sstevel@tonic-gate 		}
65760Sstevel@tonic-gate 		min_ecache_size = ecache_size;
65770Sstevel@tonic-gate 	} else {
65780Sstevel@tonic-gate 		/*
65790Sstevel@tonic-gate 		 * MP initialization. Called *after* /etc/system has
65800Sstevel@tonic-gate 		 * been parsed. One CPU has already been initialized.
65810Sstevel@tonic-gate 		 * Need to cater for /etc/system having scragged one
65820Sstevel@tonic-gate 		 * of our values.
65830Sstevel@tonic-gate 		 */
65840Sstevel@tonic-gate 		if (ecache_size == min_ecache_size) {
65850Sstevel@tonic-gate 			/*
65860Sstevel@tonic-gate 			 * Same size ecache. We do nothing unless we
65870Sstevel@tonic-gate 			 * have a pessimistic ecache setting. In that
65880Sstevel@tonic-gate 			 * case we become more optimistic (if the cache is
65890Sstevel@tonic-gate 			 * large enough).
65900Sstevel@tonic-gate 			 */
65910Sstevel@tonic-gate 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
65920Sstevel@tonic-gate 				/*
65930Sstevel@tonic-gate 				 * Need to adjust hw_copy_limit* from our
65940Sstevel@tonic-gate 				 * pessimistic uniprocessor value to a more
65950Sstevel@tonic-gate 				 * optimistic UP value *iff* it hasn't been
65960Sstevel@tonic-gate 				 * reset.
65970Sstevel@tonic-gate 				 */
65980Sstevel@tonic-gate 				if ((ecache_size > 1048576) &&
65990Sstevel@tonic-gate 				    (priv_hcl_8 == hw_copy_limit_8)) {
66000Sstevel@tonic-gate 					if (ecache_size <= 2097152)
66010Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
66020Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
66030Sstevel@tonic-gate 					else if (ecache_size <= 4194304)
66040Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
66050Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
66060Sstevel@tonic-gate 					else
66070Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
66080Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
66090Sstevel@tonic-gate 					priv_hcl_8 = hw_copy_limit_8;
66100Sstevel@tonic-gate 				}
66110Sstevel@tonic-gate 			}
66120Sstevel@tonic-gate 		} else if (ecache_size < min_ecache_size) {
66130Sstevel@tonic-gate 			/*
66140Sstevel@tonic-gate 			 * A different ecache size. Can this even happen?
66150Sstevel@tonic-gate 			 */
66160Sstevel@tonic-gate 			if (priv_hcl_8 == hw_copy_limit_8) {
66170Sstevel@tonic-gate 				/*
66180Sstevel@tonic-gate 				 * The previous value that we set
66190Sstevel@tonic-gate 				 * is unchanged (i.e., it hasn't been
66200Sstevel@tonic-gate 				 * scragged by /etc/system). Rewrite it.
66210Sstevel@tonic-gate 				 */
66220Sstevel@tonic-gate 				if (ecache_size <= 1048576)
66230Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
66240Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
66250Sstevel@tonic-gate 				else if (ecache_size <= 2097152)
66260Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
66270Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
66280Sstevel@tonic-gate 				else if (ecache_size <= 4194304)
66290Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
66300Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
66310Sstevel@tonic-gate 				else
66320Sstevel@tonic-gate 					hw_copy_limit_8 = 10 *
66330Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
66340Sstevel@tonic-gate 				priv_hcl_8 = hw_copy_limit_8;
66350Sstevel@tonic-gate 				min_ecache_size = ecache_size;
66360Sstevel@tonic-gate 			}
66370Sstevel@tonic-gate 		}
66380Sstevel@tonic-gate 	}
66390Sstevel@tonic-gate }
66400Sstevel@tonic-gate 
66410Sstevel@tonic-gate /*
66420Sstevel@tonic-gate  * Called from illegal instruction trap handler to see if we can attribute
66430Sstevel@tonic-gate  * the trap to a fpras check.
66440Sstevel@tonic-gate  */
66450Sstevel@tonic-gate int
66460Sstevel@tonic-gate fpras_chktrap(struct regs *rp)
66470Sstevel@tonic-gate {
66480Sstevel@tonic-gate 	int op;
66490Sstevel@tonic-gate 	struct fpras_chkfngrp *cgp;
66500Sstevel@tonic-gate 	uintptr_t tpc = (uintptr_t)rp->r_pc;
66510Sstevel@tonic-gate 
66520Sstevel@tonic-gate 	if (fpras_chkfngrps == NULL)
66530Sstevel@tonic-gate 		return (0);
66540Sstevel@tonic-gate 
66550Sstevel@tonic-gate 	cgp = &fpras_chkfngrps[CPU->cpu_id];
66560Sstevel@tonic-gate 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
66570Sstevel@tonic-gate 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
66580Sstevel@tonic-gate 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
66590Sstevel@tonic-gate 			break;
66600Sstevel@tonic-gate 	}
66610Sstevel@tonic-gate 	if (op == FPRAS_NCOPYOPS)
66620Sstevel@tonic-gate 		return (0);
66630Sstevel@tonic-gate 
66640Sstevel@tonic-gate 	/*
66650Sstevel@tonic-gate 	 * This is an fpRAS failure caught through an illegal
66660Sstevel@tonic-gate 	 * instruction - trampoline.
66670Sstevel@tonic-gate 	 */
66680Sstevel@tonic-gate 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
66690Sstevel@tonic-gate 	rp->r_npc = rp->r_pc + 4;
66700Sstevel@tonic-gate 	return (1);
66710Sstevel@tonic-gate }
66720Sstevel@tonic-gate 
66730Sstevel@tonic-gate /*
66740Sstevel@tonic-gate  * fpras_failure is called when a fpras check detects a bad calculation
66750Sstevel@tonic-gate  * result or an illegal instruction trap is attributed to an fpras
66760Sstevel@tonic-gate  * check.  In all cases we are still bound to CPU.
66770Sstevel@tonic-gate  */
66780Sstevel@tonic-gate int
66790Sstevel@tonic-gate fpras_failure(int op, int how)
66800Sstevel@tonic-gate {
66810Sstevel@tonic-gate 	int use_hw_bcopy_orig, use_hw_bzero_orig;
66820Sstevel@tonic-gate 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
66830Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
66840Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
66850Sstevel@tonic-gate 	struct fpras_chkfn *sfp, *cfp;
66860Sstevel@tonic-gate 	uint32_t *sip, *cip;
66870Sstevel@tonic-gate 	int i;
66880Sstevel@tonic-gate 
66890Sstevel@tonic-gate 	/*
66900Sstevel@tonic-gate 	 * We're running on a sick CPU.  Avoid further FPU use at least for
66910Sstevel@tonic-gate 	 * the time in which we dispatch an ereport and (if applicable) panic.
66920Sstevel@tonic-gate 	 */
66930Sstevel@tonic-gate 	use_hw_bcopy_orig = use_hw_bcopy;
66940Sstevel@tonic-gate 	use_hw_bzero_orig = use_hw_bzero;
66950Sstevel@tonic-gate 	hcl1_orig = hw_copy_limit_1;
66960Sstevel@tonic-gate 	hcl2_orig = hw_copy_limit_2;
66970Sstevel@tonic-gate 	hcl4_orig = hw_copy_limit_4;
66980Sstevel@tonic-gate 	hcl8_orig = hw_copy_limit_8;
66990Sstevel@tonic-gate 	use_hw_bcopy = use_hw_bzero = 0;
67000Sstevel@tonic-gate 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
67010Sstevel@tonic-gate 	    hw_copy_limit_8 = 0;
67020Sstevel@tonic-gate 
67030Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
67040Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
67050Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
67060Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
67070Sstevel@tonic-gate 	aflt->flt_status = (how << 8) | op;
67080Sstevel@tonic-gate 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
67090Sstevel@tonic-gate 	ch_flt.flt_type = CPU_FPUERR;
67100Sstevel@tonic-gate 
67110Sstevel@tonic-gate 	/*
67120Sstevel@tonic-gate 	 * We must panic if the copy operation had no lofault protection -
67130Sstevel@tonic-gate 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
67140Sstevel@tonic-gate 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
67150Sstevel@tonic-gate 	 */
67160Sstevel@tonic-gate 	aflt->flt_panic = (curthread->t_lofault == NULL);
67170Sstevel@tonic-gate 
67180Sstevel@tonic-gate 	/*
67190Sstevel@tonic-gate 	 * XOR the source instruction block with the copied instruction
67200Sstevel@tonic-gate 	 * block - this will show us which bit(s) are corrupted.
67210Sstevel@tonic-gate 	 */
67220Sstevel@tonic-gate 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
67230Sstevel@tonic-gate 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
67240Sstevel@tonic-gate 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
67250Sstevel@tonic-gate 		sip = &sfp->fpras_blk0[0];
67260Sstevel@tonic-gate 		cip = &cfp->fpras_blk0[0];
67270Sstevel@tonic-gate 	} else {
67280Sstevel@tonic-gate 		sip = &sfp->fpras_blk1[0];
67290Sstevel@tonic-gate 		cip = &cfp->fpras_blk1[0];
67300Sstevel@tonic-gate 	}
67310Sstevel@tonic-gate 	for (i = 0; i < 16; ++i, ++sip, ++cip)
67320Sstevel@tonic-gate 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
67330Sstevel@tonic-gate 
67340Sstevel@tonic-gate 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
67350Sstevel@tonic-gate 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
67360Sstevel@tonic-gate 
67370Sstevel@tonic-gate 	if (aflt->flt_panic)
67380Sstevel@tonic-gate 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
67390Sstevel@tonic-gate 
67400Sstevel@tonic-gate 	/*
67410Sstevel@tonic-gate 	 * We get here for copyin/copyout and kcopy or bcopy where the
67420Sstevel@tonic-gate 	 * caller has used on_fault.  We will flag the error so that
67430Sstevel@tonic-gate 	 * the process may be killed  The trap_async_hwerr mechanism will
67440Sstevel@tonic-gate 	 * take appropriate further action (such as a reboot, contract
67450Sstevel@tonic-gate 	 * notification etc).  Since we may be continuing we will
67460Sstevel@tonic-gate 	 * restore the global hardware copy acceleration switches.
67470Sstevel@tonic-gate 	 *
67480Sstevel@tonic-gate 	 * When we return from this function to the copy function we want to
67490Sstevel@tonic-gate 	 * avoid potentially bad data being used, ie we want the affected
67500Sstevel@tonic-gate 	 * copy function to return an error.  The caller should therefore
67510Sstevel@tonic-gate 	 * invoke its lofault handler (which always exists for these functions)
67520Sstevel@tonic-gate 	 * which will return the appropriate error.
67530Sstevel@tonic-gate 	 */
67540Sstevel@tonic-gate 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
67550Sstevel@tonic-gate 	aston(curthread);
67560Sstevel@tonic-gate 
67570Sstevel@tonic-gate 	use_hw_bcopy = use_hw_bcopy_orig;
67580Sstevel@tonic-gate 	use_hw_bzero = use_hw_bzero_orig;
67590Sstevel@tonic-gate 	hw_copy_limit_1 = hcl1_orig;
67600Sstevel@tonic-gate 	hw_copy_limit_2 = hcl2_orig;
67610Sstevel@tonic-gate 	hw_copy_limit_4 = hcl4_orig;
67620Sstevel@tonic-gate 	hw_copy_limit_8 = hcl8_orig;
67630Sstevel@tonic-gate 
67640Sstevel@tonic-gate 	return (1);
67650Sstevel@tonic-gate }
67660Sstevel@tonic-gate 
67670Sstevel@tonic-gate #define	VIS_BLOCKSIZE		64
67680Sstevel@tonic-gate 
67690Sstevel@tonic-gate int
67700Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
67710Sstevel@tonic-gate {
67720Sstevel@tonic-gate 	int ret, watched;
67730Sstevel@tonic-gate 
67740Sstevel@tonic-gate 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
67750Sstevel@tonic-gate 	ret = dtrace_blksuword32(addr, data, 0);
67760Sstevel@tonic-gate 	if (watched)
67770Sstevel@tonic-gate 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
67780Sstevel@tonic-gate 
67790Sstevel@tonic-gate 	return (ret);
67800Sstevel@tonic-gate }
67810Sstevel@tonic-gate 
67820Sstevel@tonic-gate /*
67830Sstevel@tonic-gate  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
67840Sstevel@tonic-gate  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
67850Sstevel@tonic-gate  * CEEN from the EER to disable traps for further disrupting error types
67860Sstevel@tonic-gate  * on that cpu.  We could cross-call instead, but that has a larger
67870Sstevel@tonic-gate  * instruction and data footprint than cross-trapping, and the cpu is known
67880Sstevel@tonic-gate  * to be faulted.
67890Sstevel@tonic-gate  */
67900Sstevel@tonic-gate 
67910Sstevel@tonic-gate void
67920Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp)
67930Sstevel@tonic-gate {
67940Sstevel@tonic-gate 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
67950Sstevel@tonic-gate }
67960Sstevel@tonic-gate 
67970Sstevel@tonic-gate /*
67980Sstevel@tonic-gate  * Called when a cpu leaves the CPU_FAULTED state to return to one of
67990Sstevel@tonic-gate  * offline, spare, or online (by the cpu requesting this state change).
68000Sstevel@tonic-gate  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
68010Sstevel@tonic-gate  * disrupting error bits that have accumulated without trapping, then
68020Sstevel@tonic-gate  * we cross-trap to re-enable CEEN controlled traps.
68030Sstevel@tonic-gate  */
68040Sstevel@tonic-gate void
68050Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp)
68060Sstevel@tonic-gate {
68070Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
68080Sstevel@tonic-gate 
68090Sstevel@tonic-gate 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
68100Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
68110Sstevel@tonic-gate 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
68120Sstevel@tonic-gate 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
68130Sstevel@tonic-gate 	    (uint64_t)&cpu_error_regs, 0);
68140Sstevel@tonic-gate 
68150Sstevel@tonic-gate 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
68160Sstevel@tonic-gate }
68170Sstevel@tonic-gate 
68180Sstevel@tonic-gate /*
68190Sstevel@tonic-gate  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
68200Sstevel@tonic-gate  * the errors in the original AFSR, 0 otherwise.
68210Sstevel@tonic-gate  *
68220Sstevel@tonic-gate  * For all procs if the initial error was a BERR or TO, then it is possible
68230Sstevel@tonic-gate  * that we may have caused a secondary BERR or TO in the process of logging the
68240Sstevel@tonic-gate  * inital error via cpu_run_bus_error_handlers().  If this is the case then
68250Sstevel@tonic-gate  * if the request was protected then a panic is still not necessary, if not
68260Sstevel@tonic-gate  * protected then aft_panic is already set - so either way there's no need
68270Sstevel@tonic-gate  * to set aft_panic for the secondary error.
68280Sstevel@tonic-gate  *
68290Sstevel@tonic-gate  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
68300Sstevel@tonic-gate  * a store merge, then the error handling code will call cpu_deferred_error().
68310Sstevel@tonic-gate  * When clear_errors() is called, it will determine that secondary errors have
68320Sstevel@tonic-gate  * occurred - in particular, the store merge also caused a EDU and WDU that
68330Sstevel@tonic-gate  * weren't discovered until this point.
68340Sstevel@tonic-gate  *
68350Sstevel@tonic-gate  * We do three checks to verify that we are in this case.  If we pass all three
68360Sstevel@tonic-gate  * checks, we return 1 to indicate that we should not panic.  If any unexpected
68370Sstevel@tonic-gate  * errors occur, we return 0.
68380Sstevel@tonic-gate  *
68390Sstevel@tonic-gate  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
68400Sstevel@tonic-gate  * handled in cpu_disrupting_errors().  Since this function is not even called
68410Sstevel@tonic-gate  * in the case we are interested in, we just return 0 for these processors.
68420Sstevel@tonic-gate  */
68430Sstevel@tonic-gate /*ARGSUSED*/
68440Sstevel@tonic-gate static int
68450Sstevel@tonic-gate cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
68460Sstevel@tonic-gate     uint64_t t_afar)
68470Sstevel@tonic-gate {
68480Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
68490Sstevel@tonic-gate #else	/* CHEETAH_PLUS */
68500Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
68510Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
68520Sstevel@tonic-gate 
68530Sstevel@tonic-gate 	/*
68540Sstevel@tonic-gate 	 * Was the original error a BERR or TO and only a BERR or TO
68550Sstevel@tonic-gate 	 * (multiple errors are also OK)
68560Sstevel@tonic-gate 	 */
68570Sstevel@tonic-gate 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
68580Sstevel@tonic-gate 		/*
68590Sstevel@tonic-gate 		 * Is the new error a BERR or TO and only a BERR or TO
68600Sstevel@tonic-gate 		 * (multiple errors are also OK)
68610Sstevel@tonic-gate 		 */
68620Sstevel@tonic-gate 		if ((ch_flt->afsr_errs &
68630Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
68640Sstevel@tonic-gate 			return (1);
68650Sstevel@tonic-gate 	}
68660Sstevel@tonic-gate 
68670Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
68680Sstevel@tonic-gate 	return (0);
68690Sstevel@tonic-gate #else	/* CHEETAH_PLUS */
68700Sstevel@tonic-gate 	/*
68710Sstevel@tonic-gate 	 * Now look for secondary effects of a UE on cheetah/jalapeno
68720Sstevel@tonic-gate 	 *
68730Sstevel@tonic-gate 	 * Check the original error was a UE, and only a UE.  Note that
68740Sstevel@tonic-gate 	 * the ME bit will cause us to fail this check.
68750Sstevel@tonic-gate 	 */
68760Sstevel@tonic-gate 	if (t_afsr_errs != C_AFSR_UE)
68770Sstevel@tonic-gate 		return (0);
68780Sstevel@tonic-gate 
68790Sstevel@tonic-gate 	/*
68800Sstevel@tonic-gate 	 * Check the secondary errors were exclusively an EDU and/or WDU.
68810Sstevel@tonic-gate 	 */
68820Sstevel@tonic-gate 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
68830Sstevel@tonic-gate 		return (0);
68840Sstevel@tonic-gate 
68850Sstevel@tonic-gate 	/*
68860Sstevel@tonic-gate 	 * Check the AFAR of the original error and secondary errors
68870Sstevel@tonic-gate 	 * match to the 64-byte boundary
68880Sstevel@tonic-gate 	 */
68890Sstevel@tonic-gate 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
68900Sstevel@tonic-gate 		return (0);
68910Sstevel@tonic-gate 
68920Sstevel@tonic-gate 	/*
68930Sstevel@tonic-gate 	 * We've passed all the checks, so it's a secondary error!
68940Sstevel@tonic-gate 	 */
68950Sstevel@tonic-gate 	return (1);
68960Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
68970Sstevel@tonic-gate }
68980Sstevel@tonic-gate 
68990Sstevel@tonic-gate /*
69000Sstevel@tonic-gate  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
69010Sstevel@tonic-gate  * is checked for any valid errors.  If found, the error type is
69020Sstevel@tonic-gate  * returned. If not found, the flt_type is checked for L1$ parity errors.
69030Sstevel@tonic-gate  */
69040Sstevel@tonic-gate /*ARGSUSED*/
69050Sstevel@tonic-gate static uint8_t
69060Sstevel@tonic-gate cpu_flt_bit_to_plat_error(struct async_flt *aflt)
69070Sstevel@tonic-gate {
69080Sstevel@tonic-gate #if defined(JALAPENO)
69090Sstevel@tonic-gate 	/*
69100Sstevel@tonic-gate 	 * Currently, logging errors to the SC is not supported on Jalapeno
69110Sstevel@tonic-gate 	 */
69120Sstevel@tonic-gate 	return (PLAT_ECC_ERROR2_NONE);
69130Sstevel@tonic-gate #else
69140Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
69150Sstevel@tonic-gate 
69160Sstevel@tonic-gate 	switch (ch_flt->flt_bit) {
69170Sstevel@tonic-gate 	case C_AFSR_CE:
69180Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_CE);
69190Sstevel@tonic-gate 	case C_AFSR_UCC:
69200Sstevel@tonic-gate 	case C_AFSR_EDC:
69210Sstevel@tonic-gate 	case C_AFSR_WDC:
69220Sstevel@tonic-gate 	case C_AFSR_CPC:
69230Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_CE);
69240Sstevel@tonic-gate 	case C_AFSR_EMC:
69250Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_EMC);
69260Sstevel@tonic-gate 	case C_AFSR_IVC:
69270Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IVC);
69280Sstevel@tonic-gate 	case C_AFSR_UE:
69290Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_UE);
69300Sstevel@tonic-gate 	case C_AFSR_UCU:
69310Sstevel@tonic-gate 	case C_AFSR_EDU:
69320Sstevel@tonic-gate 	case C_AFSR_WDU:
69330Sstevel@tonic-gate 	case C_AFSR_CPU:
69340Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_UE);
69350Sstevel@tonic-gate 	case C_AFSR_IVU:
69360Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IVU);
69370Sstevel@tonic-gate 	case C_AFSR_TO:
69380Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_TO);
69390Sstevel@tonic-gate 	case C_AFSR_BERR:
69400Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_BERR);
69410Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
69420Sstevel@tonic-gate 	case C_AFSR_L3_EDC:
69430Sstevel@tonic-gate 	case C_AFSR_L3_UCC:
69440Sstevel@tonic-gate 	case C_AFSR_L3_CPC:
69450Sstevel@tonic-gate 	case C_AFSR_L3_WDC:
69460Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_CE);
69470Sstevel@tonic-gate 	case C_AFSR_IMC:
69480Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IMC);
69490Sstevel@tonic-gate 	case C_AFSR_TSCE:
69500Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_TSCE);
69510Sstevel@tonic-gate 	case C_AFSR_THCE:
69520Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_THCE);
69530Sstevel@tonic-gate 	case C_AFSR_L3_MECC:
69540Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_MECC);
69550Sstevel@tonic-gate 	case C_AFSR_L3_THCE:
69560Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_THCE);
69570Sstevel@tonic-gate 	case C_AFSR_L3_CPU:
69580Sstevel@tonic-gate 	case C_AFSR_L3_EDU:
69590Sstevel@tonic-gate 	case C_AFSR_L3_UCU:
69600Sstevel@tonic-gate 	case C_AFSR_L3_WDU:
69610Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_UE);
69620Sstevel@tonic-gate 	case C_AFSR_DUE:
69630Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DUE);
69640Sstevel@tonic-gate 	case C_AFSR_DTO:
69650Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DTO);
69660Sstevel@tonic-gate 	case C_AFSR_DBERR:
69670Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DBERR);
69680Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
69690Sstevel@tonic-gate 	default:
69700Sstevel@tonic-gate 		switch (ch_flt->flt_type) {
69710Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
69720Sstevel@tonic-gate 		case CPU_IC_PARITY:
69730Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_IPE);
69740Sstevel@tonic-gate 		case CPU_DC_PARITY:
69750Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
69760Sstevel@tonic-gate 				if (ch_flt->parity_data.dpe.cpl_cache ==
69770Sstevel@tonic-gate 				    CPU_PC_PARITY) {
69780Sstevel@tonic-gate 					return (PLAT_ECC_ERROR2_PCACHE);
69790Sstevel@tonic-gate 				}
69800Sstevel@tonic-gate 			}
69810Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_DPE);
69820Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
69830Sstevel@tonic-gate 		case CPU_ITLB_PARITY:
69840Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_ITLB);
69850Sstevel@tonic-gate 		case CPU_DTLB_PARITY:
69860Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_DTLB);
69870Sstevel@tonic-gate 		default:
69880Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_NONE);
69890Sstevel@tonic-gate 		}
69900Sstevel@tonic-gate 	}
69910Sstevel@tonic-gate #endif	/* JALAPENO */
69920Sstevel@tonic-gate }
6993