xref: /onnv-gate/usr/src/uts/sun4u/cpu/us3_common.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <sys/systm.h>
31*0Sstevel@tonic-gate #include <sys/ddi.h>
32*0Sstevel@tonic-gate #include <sys/sysmacros.h>
33*0Sstevel@tonic-gate #include <sys/archsystm.h>
34*0Sstevel@tonic-gate #include <sys/vmsystm.h>
35*0Sstevel@tonic-gate #include <sys/machparam.h>
36*0Sstevel@tonic-gate #include <sys/machsystm.h>
37*0Sstevel@tonic-gate #include <sys/machthread.h>
38*0Sstevel@tonic-gate #include <sys/cpu.h>
39*0Sstevel@tonic-gate #include <sys/cmp.h>
40*0Sstevel@tonic-gate #include <sys/elf_SPARC.h>
41*0Sstevel@tonic-gate #include <vm/vm_dep.h>
42*0Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
43*0Sstevel@tonic-gate #include <vm/seg_kpm.h>
44*0Sstevel@tonic-gate #include <sys/cpuvar.h>
45*0Sstevel@tonic-gate #include <sys/cheetahregs.h>
46*0Sstevel@tonic-gate #include <sys/us3_module.h>
47*0Sstevel@tonic-gate #include <sys/async.h>
48*0Sstevel@tonic-gate #include <sys/cmn_err.h>
49*0Sstevel@tonic-gate #include <sys/debug.h>
50*0Sstevel@tonic-gate #include <sys/dditypes.h>
51*0Sstevel@tonic-gate #include <sys/prom_debug.h>
52*0Sstevel@tonic-gate #include <sys/prom_plat.h>
53*0Sstevel@tonic-gate #include <sys/cpu_module.h>
54*0Sstevel@tonic-gate #include <sys/sysmacros.h>
55*0Sstevel@tonic-gate #include <sys/intreg.h>
56*0Sstevel@tonic-gate #include <sys/clock.h>
57*0Sstevel@tonic-gate #include <sys/platform_module.h>
58*0Sstevel@tonic-gate #include <sys/machtrap.h>
59*0Sstevel@tonic-gate #include <sys/ontrap.h>
60*0Sstevel@tonic-gate #include <sys/panic.h>
61*0Sstevel@tonic-gate #include <sys/memlist.h>
62*0Sstevel@tonic-gate #include <sys/bootconf.h>
63*0Sstevel@tonic-gate #include <sys/ivintr.h>
64*0Sstevel@tonic-gate #include <sys/atomic.h>
65*0Sstevel@tonic-gate #include <sys/taskq.h>
66*0Sstevel@tonic-gate #include <sys/note.h>
67*0Sstevel@tonic-gate #include <sys/ndifm.h>
68*0Sstevel@tonic-gate #include <sys/ddifm.h>
69*0Sstevel@tonic-gate #include <sys/fm/protocol.h>
70*0Sstevel@tonic-gate #include <sys/fm/util.h>
71*0Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-III.h>
72*0Sstevel@tonic-gate #include <sys/fpras_impl.h>
73*0Sstevel@tonic-gate #include <sys/dtrace.h>
74*0Sstevel@tonic-gate #include <sys/watchpoint.h>
75*0Sstevel@tonic-gate #include <sys/plat_ecc_unum.h>
76*0Sstevel@tonic-gate #include <sys/cyclic.h>
77*0Sstevel@tonic-gate #include <sys/errorq.h>
78*0Sstevel@tonic-gate #include <sys/errclassify.h>
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
81*0Sstevel@tonic-gate #include <sys/xc_impl.h>
82*0Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate /*
85*0Sstevel@tonic-gate  * Note that 'Cheetah PRM' refers to:
86*0Sstevel@tonic-gate  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87*0Sstevel@tonic-gate  */
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate /*
90*0Sstevel@tonic-gate  * Per CPU pointers to physical address of TL>0 logout data areas.
91*0Sstevel@tonic-gate  * These pointers have to be in the kernel nucleus to avoid MMU
92*0Sstevel@tonic-gate  * misses.
93*0Sstevel@tonic-gate  */
94*0Sstevel@tonic-gate uint64_t ch_err_tl1_paddrs[NCPU];
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate /*
97*0Sstevel@tonic-gate  * One statically allocated structure to use during startup/DR
98*0Sstevel@tonic-gate  * to prevent unnecessary panics.
99*0Sstevel@tonic-gate  */
100*0Sstevel@tonic-gate ch_err_tl1_data_t ch_err_tl1_data;
101*0Sstevel@tonic-gate 
102*0Sstevel@tonic-gate /*
103*0Sstevel@tonic-gate  * Per CPU pending error at TL>0, used by level15 softint handler
104*0Sstevel@tonic-gate  */
105*0Sstevel@tonic-gate uchar_t ch_err_tl1_pending[NCPU];
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate /*
108*0Sstevel@tonic-gate  * For deferred CE re-enable after trap.
109*0Sstevel@tonic-gate  */
110*0Sstevel@tonic-gate taskq_t		*ch_check_ce_tq;
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate /*
113*0Sstevel@tonic-gate  * Internal functions.
114*0Sstevel@tonic-gate  */
115*0Sstevel@tonic-gate static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116*0Sstevel@tonic-gate static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117*0Sstevel@tonic-gate static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118*0Sstevel@tonic-gate     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119*0Sstevel@tonic-gate static int clear_ecc(struct async_flt *ecc);
120*0Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
121*0Sstevel@tonic-gate static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122*0Sstevel@tonic-gate #endif
123*0Sstevel@tonic-gate static int cpu_ecache_set_size(struct cpu *cp);
124*0Sstevel@tonic-gate static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125*0Sstevel@tonic-gate static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126*0Sstevel@tonic-gate static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127*0Sstevel@tonic-gate static int cpu_ectag_pa_to_subblk_state(int cachesize,
128*0Sstevel@tonic-gate 				uint64_t subaddr, uint64_t tag);
129*0Sstevel@tonic-gate static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130*0Sstevel@tonic-gate static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131*0Sstevel@tonic-gate static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132*0Sstevel@tonic-gate static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133*0Sstevel@tonic-gate static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134*0Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135*0Sstevel@tonic-gate static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136*0Sstevel@tonic-gate static void cpu_scrubphys(struct async_flt *aflt);
137*0Sstevel@tonic-gate static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138*0Sstevel@tonic-gate     int *, int *);
139*0Sstevel@tonic-gate static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140*0Sstevel@tonic-gate static void cpu_ereport_init(struct async_flt *aflt);
141*0Sstevel@tonic-gate static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142*0Sstevel@tonic-gate static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143*0Sstevel@tonic-gate static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144*0Sstevel@tonic-gate     ch_cpu_logout_t *clop);
145*0Sstevel@tonic-gate static int cpu_ce_delayed_ec_logout(uint64_t);
146*0Sstevel@tonic-gate static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147*0Sstevel@tonic-gate 
148*0Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
149*0Sstevel@tonic-gate static int mondo_recover_proc(uint16_t, int);
150*0Sstevel@tonic-gate static void cheetah_nudge_init(void);
151*0Sstevel@tonic-gate static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152*0Sstevel@tonic-gate     cyc_time_t *when);
153*0Sstevel@tonic-gate static void cheetah_nudge_buddy(void);
154*0Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
157*0Sstevel@tonic-gate static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158*0Sstevel@tonic-gate static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159*0Sstevel@tonic-gate static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160*0Sstevel@tonic-gate     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161*0Sstevel@tonic-gate static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162*0Sstevel@tonic-gate static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163*0Sstevel@tonic-gate static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164*0Sstevel@tonic-gate static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165*0Sstevel@tonic-gate static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166*0Sstevel@tonic-gate static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170*0Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171*0Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp);
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate /*
174*0Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when an ECC
175*0Sstevel@tonic-gate  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176*0Sstevel@tonic-gate  * of this array have the following semantics:
177*0Sstevel@tonic-gate  *
178*0Sstevel@tonic-gate  *      00-127  The number of the bad bit, when only one bit is bad.
179*0Sstevel@tonic-gate  *      128     ECC bit C0 is bad.
180*0Sstevel@tonic-gate  *      129     ECC bit C1 is bad.
181*0Sstevel@tonic-gate  *      130     ECC bit C2 is bad.
182*0Sstevel@tonic-gate  *      131     ECC bit C3 is bad.
183*0Sstevel@tonic-gate  *      132     ECC bit C4 is bad.
184*0Sstevel@tonic-gate  *      133     ECC bit C5 is bad.
185*0Sstevel@tonic-gate  *      134     ECC bit C6 is bad.
186*0Sstevel@tonic-gate  *      135     ECC bit C7 is bad.
187*0Sstevel@tonic-gate  *      136     ECC bit C8 is bad.
188*0Sstevel@tonic-gate  *	137-143 reserved for Mtag Data and ECC.
189*0Sstevel@tonic-gate  *      144(M2) Two bits are bad within a nibble.
190*0Sstevel@tonic-gate  *      145(M3) Three bits are bad within a nibble.
191*0Sstevel@tonic-gate  *      146(M3) Four bits are bad within a nibble.
192*0Sstevel@tonic-gate  *      147(M)  Multiple bits (5 or more) are bad.
193*0Sstevel@tonic-gate  *      148     NO bits are bad.
194*0Sstevel@tonic-gate  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195*0Sstevel@tonic-gate  */
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate #define	C0	128
198*0Sstevel@tonic-gate #define	C1	129
199*0Sstevel@tonic-gate #define	C2	130
200*0Sstevel@tonic-gate #define	C3	131
201*0Sstevel@tonic-gate #define	C4	132
202*0Sstevel@tonic-gate #define	C5	133
203*0Sstevel@tonic-gate #define	C6	134
204*0Sstevel@tonic-gate #define	C7	135
205*0Sstevel@tonic-gate #define	C8	136
206*0Sstevel@tonic-gate #define	MT0	137	/* Mtag Data bit 0 */
207*0Sstevel@tonic-gate #define	MT1	138
208*0Sstevel@tonic-gate #define	MT2	139
209*0Sstevel@tonic-gate #define	MTC0	140	/* Mtag Check bit 0 */
210*0Sstevel@tonic-gate #define	MTC1	141
211*0Sstevel@tonic-gate #define	MTC2	142
212*0Sstevel@tonic-gate #define	MTC3	143
213*0Sstevel@tonic-gate #define	M2	144
214*0Sstevel@tonic-gate #define	M3	145
215*0Sstevel@tonic-gate #define	M4	146
216*0Sstevel@tonic-gate #define	M	147
217*0Sstevel@tonic-gate #define	NA	148
218*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
219*0Sstevel@tonic-gate #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220*0Sstevel@tonic-gate #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221*0Sstevel@tonic-gate #define	SLAST	S003MEM	/* last special syndrome */
222*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
223*0Sstevel@tonic-gate #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224*0Sstevel@tonic-gate #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225*0Sstevel@tonic-gate #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226*0Sstevel@tonic-gate #define	SLAST	S11C	/* last special syndrome */
227*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
228*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
229*0Sstevel@tonic-gate #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230*0Sstevel@tonic-gate #define	BPAR15	167
231*0Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
232*0Sstevel@tonic-gate 
233*0Sstevel@tonic-gate static uint8_t ecc_syndrome_tab[] =
234*0Sstevel@tonic-gate {
235*0Sstevel@tonic-gate NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236*0Sstevel@tonic-gate C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237*0Sstevel@tonic-gate C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238*0Sstevel@tonic-gate M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239*0Sstevel@tonic-gate C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240*0Sstevel@tonic-gate M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241*0Sstevel@tonic-gate M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
243*0Sstevel@tonic-gate 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244*0Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
245*0Sstevel@tonic-gate 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246*0Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
247*0Sstevel@tonic-gate C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248*0Sstevel@tonic-gate M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249*0Sstevel@tonic-gate M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250*0Sstevel@tonic-gate 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251*0Sstevel@tonic-gate M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252*0Sstevel@tonic-gate 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253*0Sstevel@tonic-gate 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254*0Sstevel@tonic-gate M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255*0Sstevel@tonic-gate C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
257*0Sstevel@tonic-gate M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258*0Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
259*0Sstevel@tonic-gate M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260*0Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
261*0Sstevel@tonic-gate M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262*0Sstevel@tonic-gate 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263*0Sstevel@tonic-gate M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264*0Sstevel@tonic-gate 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265*0Sstevel@tonic-gate 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266*0Sstevel@tonic-gate M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267*0Sstevel@tonic-gate M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268*0Sstevel@tonic-gate 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269*0Sstevel@tonic-gate 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270*0Sstevel@tonic-gate M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271*0Sstevel@tonic-gate 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272*0Sstevel@tonic-gate M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273*0Sstevel@tonic-gate M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274*0Sstevel@tonic-gate 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275*0Sstevel@tonic-gate };
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate #if !(defined(JALAPENO) || defined(SERRANO))
280*0Sstevel@tonic-gate /*
281*0Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when a Mtag
282*0Sstevel@tonic-gate  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283*0Sstevel@tonic-gate  * of this array have the following semantics:
284*0Sstevel@tonic-gate  *
285*0Sstevel@tonic-gate  *      -1	Invalid mtag syndrome.
286*0Sstevel@tonic-gate  *      137     Mtag Data 0 is bad.
287*0Sstevel@tonic-gate  *      138     Mtag Data 1 is bad.
288*0Sstevel@tonic-gate  *      139     Mtag Data 2 is bad.
289*0Sstevel@tonic-gate  *      140     Mtag ECC 0 is bad.
290*0Sstevel@tonic-gate  *      141     Mtag ECC 1 is bad.
291*0Sstevel@tonic-gate  *      142     Mtag ECC 2 is bad.
292*0Sstevel@tonic-gate  *      143     Mtag ECC 3 is bad.
293*0Sstevel@tonic-gate  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294*0Sstevel@tonic-gate  */
295*0Sstevel@tonic-gate short mtag_syndrome_tab[] =
296*0Sstevel@tonic-gate {
297*0Sstevel@tonic-gate NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298*0Sstevel@tonic-gate };
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301*0Sstevel@tonic-gate 
302*0Sstevel@tonic-gate #else /* !(JALAPENO || SERRANO) */
303*0Sstevel@tonic-gate 
304*0Sstevel@tonic-gate #define	BSYND_TBL_SIZE	16
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate #endif /* !(JALAPENO || SERRANO) */
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate /*
309*0Sstevel@tonic-gate  * CE initial classification and subsequent action lookup table
310*0Sstevel@tonic-gate  */
311*0Sstevel@tonic-gate static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312*0Sstevel@tonic-gate static int ce_disp_inited;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate /*
315*0Sstevel@tonic-gate  * Set to disable leaky and partner check for memory correctables
316*0Sstevel@tonic-gate  */
317*0Sstevel@tonic-gate int ce_xdiag_off;
318*0Sstevel@tonic-gate 
319*0Sstevel@tonic-gate /*
320*0Sstevel@tonic-gate  * The following are not incremented atomically so are indicative only
321*0Sstevel@tonic-gate  */
322*0Sstevel@tonic-gate static int ce_xdiag_drops;
323*0Sstevel@tonic-gate static int ce_xdiag_lkydrops;
324*0Sstevel@tonic-gate static int ce_xdiag_ptnrdrops;
325*0Sstevel@tonic-gate static int ce_xdiag_bad;
326*0Sstevel@tonic-gate 
327*0Sstevel@tonic-gate /*
328*0Sstevel@tonic-gate  * CE leaky check callback structure
329*0Sstevel@tonic-gate  */
330*0Sstevel@tonic-gate typedef struct {
331*0Sstevel@tonic-gate 	struct async_flt *lkycb_aflt;
332*0Sstevel@tonic-gate 	errorq_t *lkycb_eqp;
333*0Sstevel@tonic-gate 	errorq_elem_t *lkycb_eqep;
334*0Sstevel@tonic-gate } ce_lkychk_cb_t;
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate /*
337*0Sstevel@tonic-gate  * defines for various ecache_flush_flag's
338*0Sstevel@tonic-gate  */
339*0Sstevel@tonic-gate #define	ECACHE_FLUSH_LINE	1
340*0Sstevel@tonic-gate #define	ECACHE_FLUSH_ALL	2
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate /*
343*0Sstevel@tonic-gate  * STICK sync
344*0Sstevel@tonic-gate  */
345*0Sstevel@tonic-gate #define	STICK_ITERATION 10
346*0Sstevel@tonic-gate #define	MAX_TSKEW	1
347*0Sstevel@tonic-gate #define	EV_A_START	0
348*0Sstevel@tonic-gate #define	EV_A_END	1
349*0Sstevel@tonic-gate #define	EV_B_START	2
350*0Sstevel@tonic-gate #define	EV_B_END	3
351*0Sstevel@tonic-gate #define	EVENTS		4
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate static int64_t stick_iter = STICK_ITERATION;
354*0Sstevel@tonic-gate static int64_t stick_tsk = MAX_TSKEW;
355*0Sstevel@tonic-gate 
356*0Sstevel@tonic-gate typedef enum {
357*0Sstevel@tonic-gate 	EVENT_NULL = 0,
358*0Sstevel@tonic-gate 	SLAVE_START,
359*0Sstevel@tonic-gate 	SLAVE_CONT,
360*0Sstevel@tonic-gate 	MASTER_START
361*0Sstevel@tonic-gate } event_cmd_t;
362*0Sstevel@tonic-gate 
363*0Sstevel@tonic-gate static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364*0Sstevel@tonic-gate static int64_t timestamp[EVENTS];
365*0Sstevel@tonic-gate static volatile int slave_done;
366*0Sstevel@tonic-gate 
367*0Sstevel@tonic-gate #ifdef DEBUG
368*0Sstevel@tonic-gate #define	DSYNC_ATTEMPTS 64
369*0Sstevel@tonic-gate typedef struct {
370*0Sstevel@tonic-gate 	int64_t	skew_val[DSYNC_ATTEMPTS];
371*0Sstevel@tonic-gate } ss_t;
372*0Sstevel@tonic-gate 
373*0Sstevel@tonic-gate ss_t stick_sync_stats[NCPU];
374*0Sstevel@tonic-gate #endif /* DEBUG */
375*0Sstevel@tonic-gate 
376*0Sstevel@tonic-gate /*
377*0Sstevel@tonic-gate  * Maximum number of contexts for Cheetah.
378*0Sstevel@tonic-gate  */
379*0Sstevel@tonic-gate #define	MAX_NCTXS	(1 << 13)
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate /* Will be set !NULL for Cheetah+ and derivatives. */
382*0Sstevel@tonic-gate uchar_t *ctx_pgsz_array = NULL;
383*0Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
384*0Sstevel@tonic-gate static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385*0Sstevel@tonic-gate uint_t disable_dual_pgsz = 0;
386*0Sstevel@tonic-gate #endif	/* CPU_IMP_DUAL_PAGESIZE */
387*0Sstevel@tonic-gate 
388*0Sstevel@tonic-gate /*
389*0Sstevel@tonic-gate  * Save the cache bootup state for use when internal
390*0Sstevel@tonic-gate  * caches are to be re-enabled after an error occurs.
391*0Sstevel@tonic-gate  */
392*0Sstevel@tonic-gate uint64_t cache_boot_state;
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate /*
395*0Sstevel@tonic-gate  * PA[22:0] represent Displacement in Safari configuration space.
396*0Sstevel@tonic-gate  */
397*0Sstevel@tonic-gate uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398*0Sstevel@tonic-gate 
399*0Sstevel@tonic-gate bus_config_eclk_t bus_config_eclk[] = {
400*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
401*0Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402*0Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403*0Sstevel@tonic-gate 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
405*0Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406*0Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407*0Sstevel@tonic-gate 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
409*0Sstevel@tonic-gate 	{0, 0}
410*0Sstevel@tonic-gate };
411*0Sstevel@tonic-gate 
412*0Sstevel@tonic-gate /*
413*0Sstevel@tonic-gate  * Interval for deferred CEEN reenable
414*0Sstevel@tonic-gate  */
415*0Sstevel@tonic-gate int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate /*
418*0Sstevel@tonic-gate  * set in /etc/system to control logging of user BERR/TO's
419*0Sstevel@tonic-gate  */
420*0Sstevel@tonic-gate int cpu_berr_to_verbose = 0;
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate /*
423*0Sstevel@tonic-gate  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424*0Sstevel@tonic-gate  */
425*0Sstevel@tonic-gate uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426*0Sstevel@tonic-gate uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427*0Sstevel@tonic-gate 
428*0Sstevel@tonic-gate /*
429*0Sstevel@tonic-gate  * Set of all offline cpus
430*0Sstevel@tonic-gate  */
431*0Sstevel@tonic-gate cpuset_t cpu_offline_set;
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate static void cpu_delayed_check_ce_errors(void *);
434*0Sstevel@tonic-gate static void cpu_check_ce_errors(void *);
435*0Sstevel@tonic-gate void cpu_error_ecache_flush(ch_async_flt_t *);
436*0Sstevel@tonic-gate static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437*0Sstevel@tonic-gate static void cpu_log_and_clear_ce(ch_async_flt_t *);
438*0Sstevel@tonic-gate void cpu_ce_detected(ch_cpu_errors_t *, int);
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate /*
441*0Sstevel@tonic-gate  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442*0Sstevel@tonic-gate  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443*0Sstevel@tonic-gate  * gives at least one full refresh cycle in which the cell can leak
444*0Sstevel@tonic-gate  * (whereafter further refreshes simply reinforce any incorrect bit value).
445*0Sstevel@tonic-gate  */
446*0Sstevel@tonic-gate clock_t cpu_ce_lkychk_timeout_usec = 128000;
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate /*
449*0Sstevel@tonic-gate  * CE partner check partner caching period in seconds
450*0Sstevel@tonic-gate  */
451*0Sstevel@tonic-gate int cpu_ce_ptnr_cachetime_sec = 60;
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate /*
454*0Sstevel@tonic-gate  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455*0Sstevel@tonic-gate  */
456*0Sstevel@tonic-gate #define	CH_SET_TRAP(ttentry, ttlabel)			\
457*0Sstevel@tonic-gate 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458*0Sstevel@tonic-gate 		flush_instr_mem((caddr_t)&ttentry, 32);
459*0Sstevel@tonic-gate 
460*0Sstevel@tonic-gate static int min_ecache_size;
461*0Sstevel@tonic-gate static uint_t priv_hcl_1;
462*0Sstevel@tonic-gate static uint_t priv_hcl_2;
463*0Sstevel@tonic-gate static uint_t priv_hcl_4;
464*0Sstevel@tonic-gate static uint_t priv_hcl_8;
465*0Sstevel@tonic-gate 
466*0Sstevel@tonic-gate void
467*0Sstevel@tonic-gate cpu_setup(void)
468*0Sstevel@tonic-gate {
469*0Sstevel@tonic-gate 	extern int at_flags;
470*0Sstevel@tonic-gate 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471*0Sstevel@tonic-gate 	extern int cpc_has_overflow_intr;
472*0Sstevel@tonic-gate 	extern int disable_text_largepages;
473*0Sstevel@tonic-gate 	extern int use_text_pgsz4m;
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 	/*
476*0Sstevel@tonic-gate 	 * Setup chip-specific trap handlers.
477*0Sstevel@tonic-gate 	 */
478*0Sstevel@tonic-gate 	cpu_init_trap();
479*0Sstevel@tonic-gate 
480*0Sstevel@tonic-gate 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483*0Sstevel@tonic-gate 
484*0Sstevel@tonic-gate 	/*
485*0Sstevel@tonic-gate 	 * save the cache bootup state.
486*0Sstevel@tonic-gate 	 */
487*0Sstevel@tonic-gate 	cache_boot_state = get_dcu() & DCU_CACHE;
488*0Sstevel@tonic-gate 
489*0Sstevel@tonic-gate 	/*
490*0Sstevel@tonic-gate 	 * Use the maximum number of contexts available for Cheetah
491*0Sstevel@tonic-gate 	 * unless it has been tuned for debugging.
492*0Sstevel@tonic-gate 	 * We are checking against 0 here since this value can be patched
493*0Sstevel@tonic-gate 	 * while booting.  It can not be patched via /etc/system since it
494*0Sstevel@tonic-gate 	 * will be patched too late and thus cause the system to panic.
495*0Sstevel@tonic-gate 	 */
496*0Sstevel@tonic-gate 	if (nctxs == 0)
497*0Sstevel@tonic-gate 		nctxs = MAX_NCTXS;
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 	/*
500*0Sstevel@tonic-gate 	 * Due to the number of entries in the fully-associative tlb
501*0Sstevel@tonic-gate 	 * this may have to be tuned lower than in spitfire.
502*0Sstevel@tonic-gate 	 */
503*0Sstevel@tonic-gate 	pp_slots = MIN(8, MAXPP_SLOTS);
504*0Sstevel@tonic-gate 
505*0Sstevel@tonic-gate 	/*
506*0Sstevel@tonic-gate 	 * Block stores do not invalidate all pages of the d$, pagecopy
507*0Sstevel@tonic-gate 	 * et. al. need virtual translations with virtual coloring taken
508*0Sstevel@tonic-gate 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509*0Sstevel@tonic-gate 	 * load side.
510*0Sstevel@tonic-gate 	 */
511*0Sstevel@tonic-gate 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate 	if (use_page_coloring) {
514*0Sstevel@tonic-gate 		do_pg_coloring = 1;
515*0Sstevel@tonic-gate 		if (use_virtual_coloring)
516*0Sstevel@tonic-gate 			do_virtual_coloring = 1;
517*0Sstevel@tonic-gate 	}
518*0Sstevel@tonic-gate 
519*0Sstevel@tonic-gate 	isa_list =
520*0Sstevel@tonic-gate 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521*0Sstevel@tonic-gate 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522*0Sstevel@tonic-gate 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523*0Sstevel@tonic-gate 
524*0Sstevel@tonic-gate 	/*
525*0Sstevel@tonic-gate 	 * On Panther-based machines, this should
526*0Sstevel@tonic-gate 	 * also include AV_SPARC_POPC too
527*0Sstevel@tonic-gate 	 */
528*0Sstevel@tonic-gate 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529*0Sstevel@tonic-gate 
530*0Sstevel@tonic-gate 	/*
531*0Sstevel@tonic-gate 	 * On cheetah, there's no hole in the virtual address space
532*0Sstevel@tonic-gate 	 */
533*0Sstevel@tonic-gate 	hole_start = hole_end = 0;
534*0Sstevel@tonic-gate 
535*0Sstevel@tonic-gate 	/*
536*0Sstevel@tonic-gate 	 * The kpm mapping window.
537*0Sstevel@tonic-gate 	 * kpm_size:
538*0Sstevel@tonic-gate 	 *	The size of a single kpm range.
539*0Sstevel@tonic-gate 	 *	The overall size will be: kpm_size * vac_colors.
540*0Sstevel@tonic-gate 	 * kpm_vbase:
541*0Sstevel@tonic-gate 	 *	The virtual start address of the kpm range within the kernel
542*0Sstevel@tonic-gate 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543*0Sstevel@tonic-gate 	 */
544*0Sstevel@tonic-gate 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545*0Sstevel@tonic-gate 	kpm_size_shift = 43;
546*0Sstevel@tonic-gate 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547*0Sstevel@tonic-gate 	kpm_smallpages = 1;
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	/*
550*0Sstevel@tonic-gate 	 * The traptrace code uses either %tick or %stick for
551*0Sstevel@tonic-gate 	 * timestamping.  We have %stick so we can use it.
552*0Sstevel@tonic-gate 	 */
553*0Sstevel@tonic-gate 	traptrace_use_stick = 1;
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 	/*
556*0Sstevel@tonic-gate 	 * Cheetah has a performance counter overflow interrupt
557*0Sstevel@tonic-gate 	 */
558*0Sstevel@tonic-gate 	cpc_has_overflow_intr = 1;
559*0Sstevel@tonic-gate 
560*0Sstevel@tonic-gate 	/*
561*0Sstevel@tonic-gate 	 * Use cheetah flush-all support
562*0Sstevel@tonic-gate 	 */
563*0Sstevel@tonic-gate 	if (!disable_delay_tlb_flush)
564*0Sstevel@tonic-gate 		delay_tlb_flush = 1;
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate #if defined(CPU_IMP_DUAL_PAGESIZE)
567*0Sstevel@tonic-gate 	/*
568*0Sstevel@tonic-gate 	 * Use Cheetah+ and later dual page size support.
569*0Sstevel@tonic-gate 	 */
570*0Sstevel@tonic-gate 	if (!disable_dual_pgsz) {
571*0Sstevel@tonic-gate 		ctx_pgsz_array = ctx_pgsz_arr;
572*0Sstevel@tonic-gate 	}
573*0Sstevel@tonic-gate #endif	/* CPU_IMP_DUAL_PAGESIZE */
574*0Sstevel@tonic-gate 
575*0Sstevel@tonic-gate 	/*
576*0Sstevel@tonic-gate 	 * Declare that this architecture/cpu combination does fpRAS.
577*0Sstevel@tonic-gate 	 */
578*0Sstevel@tonic-gate 	fpras_implemented = 1;
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	/*
581*0Sstevel@tonic-gate 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582*0Sstevel@tonic-gate 	 * use large pages for initialized data segments since we may not know
583*0Sstevel@tonic-gate 	 * at exec() time what should be the preferred large page size for DTLB
584*0Sstevel@tonic-gate 	 * programming.
585*0Sstevel@tonic-gate 	 */
586*0Sstevel@tonic-gate 	use_text_pgsz4m = 1;
587*0Sstevel@tonic-gate 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588*0Sstevel@tonic-gate 	    (1 << TTE32M) | (1 << TTE256M);
589*0Sstevel@tonic-gate 
590*0Sstevel@tonic-gate 	/*
591*0Sstevel@tonic-gate 	 * Setup CE lookup table
592*0Sstevel@tonic-gate 	 */
593*0Sstevel@tonic-gate 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594*0Sstevel@tonic-gate 	ce_disp_inited = 1;
595*0Sstevel@tonic-gate }
596*0Sstevel@tonic-gate 
597*0Sstevel@tonic-gate /*
598*0Sstevel@tonic-gate  * Called by setcpudelay
599*0Sstevel@tonic-gate  */
600*0Sstevel@tonic-gate void
601*0Sstevel@tonic-gate cpu_init_tick_freq(void)
602*0Sstevel@tonic-gate {
603*0Sstevel@tonic-gate 	/*
604*0Sstevel@tonic-gate 	 * For UltraSPARC III and beyond we want to use the
605*0Sstevel@tonic-gate 	 * system clock rate as the basis for low level timing,
606*0Sstevel@tonic-gate 	 * due to support of mixed speed CPUs and power managment.
607*0Sstevel@tonic-gate 	 */
608*0Sstevel@tonic-gate 	if (system_clock_freq == 0)
609*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610*0Sstevel@tonic-gate 
611*0Sstevel@tonic-gate 	sys_tick_freq = system_clock_freq;
612*0Sstevel@tonic-gate }
613*0Sstevel@tonic-gate 
614*0Sstevel@tonic-gate #ifdef CHEETAHPLUS_ERRATUM_25
615*0Sstevel@tonic-gate /*
616*0Sstevel@tonic-gate  * Tunables
617*0Sstevel@tonic-gate  */
618*0Sstevel@tonic-gate int cheetah_bpe_off = 0;
619*0Sstevel@tonic-gate int cheetah_sendmondo_recover = 1;
620*0Sstevel@tonic-gate int cheetah_sendmondo_fullscan = 0;
621*0Sstevel@tonic-gate int cheetah_sendmondo_recover_delay = 5;
622*0Sstevel@tonic-gate 
623*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate /*
626*0Sstevel@tonic-gate  * Recovery Statistics
627*0Sstevel@tonic-gate  */
628*0Sstevel@tonic-gate typedef struct cheetah_livelock_entry	{
629*0Sstevel@tonic-gate 	int cpuid;		/* fallen cpu */
630*0Sstevel@tonic-gate 	int buddy;		/* cpu that ran recovery */
631*0Sstevel@tonic-gate 	clock_t lbolt;		/* when recovery started */
632*0Sstevel@tonic-gate 	hrtime_t recovery_time;	/* time spent in recovery */
633*0Sstevel@tonic-gate } cheetah_livelock_entry_t;
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_NENTRY	32
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638*0Sstevel@tonic-gate int cheetah_livelock_entry_nxt;
639*0Sstevel@tonic-gate 
640*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641*0Sstevel@tonic-gate 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642*0Sstevel@tonic-gate 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643*0Sstevel@tonic-gate 		cheetah_livelock_entry_nxt = 0;				\
644*0Sstevel@tonic-gate 	}								\
645*0Sstevel@tonic-gate }
646*0Sstevel@tonic-gate 
647*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648*0Sstevel@tonic-gate 
649*0Sstevel@tonic-gate struct {
650*0Sstevel@tonic-gate 	hrtime_t hrt;		/* maximum recovery time */
651*0Sstevel@tonic-gate 	int recovery;		/* recovered */
652*0Sstevel@tonic-gate 	int full_claimed;	/* maximum pages claimed in full recovery */
653*0Sstevel@tonic-gate 	int proc_entry;		/* attempted to claim TSB */
654*0Sstevel@tonic-gate 	int proc_tsb_scan;	/* tsb scanned */
655*0Sstevel@tonic-gate 	int proc_tsb_partscan;	/* tsb partially scanned */
656*0Sstevel@tonic-gate 	int proc_tsb_fullscan;	/* whole tsb scanned */
657*0Sstevel@tonic-gate 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658*0Sstevel@tonic-gate 	int proc_user;		/* user thread */
659*0Sstevel@tonic-gate 	int proc_kernel;	/* kernel thread */
660*0Sstevel@tonic-gate 	int proc_onflt;		/* bad stack */
661*0Sstevel@tonic-gate 	int proc_cpu;		/* null cpu */
662*0Sstevel@tonic-gate 	int proc_thread;	/* null thread */
663*0Sstevel@tonic-gate 	int proc_proc;		/* null proc */
664*0Sstevel@tonic-gate 	int proc_as;		/* null as */
665*0Sstevel@tonic-gate 	int proc_hat;		/* null hat */
666*0Sstevel@tonic-gate 	int proc_hat_inval;	/* hat contents don't make sense */
667*0Sstevel@tonic-gate 	int proc_hat_busy;	/* hat is changing TSBs */
668*0Sstevel@tonic-gate 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669*0Sstevel@tonic-gate 	int proc_cnum_bad;	/* cnum out of range */
670*0Sstevel@tonic-gate 	int proc_cnum;		/* last cnum processed */
671*0Sstevel@tonic-gate 	tte_t proc_tte;		/* last tte processed */
672*0Sstevel@tonic-gate } cheetah_livelock_stat;
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677*0Sstevel@tonic-gate 	cheetah_livelock_stat.item = value
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680*0Sstevel@tonic-gate 	if (value > cheetah_livelock_stat.item)		\
681*0Sstevel@tonic-gate 		cheetah_livelock_stat.item = value;	\
682*0Sstevel@tonic-gate }
683*0Sstevel@tonic-gate 
684*0Sstevel@tonic-gate /*
685*0Sstevel@tonic-gate  * Attempt to recover a cpu by claiming every cache line as saved
686*0Sstevel@tonic-gate  * in the TSB that the non-responsive cpu is using. Since we can't
687*0Sstevel@tonic-gate  * grab any adaptive lock, this is at best an attempt to do so. Because
688*0Sstevel@tonic-gate  * we don't grab any locks, we must operate under the protection of
689*0Sstevel@tonic-gate  * on_fault().
690*0Sstevel@tonic-gate  *
691*0Sstevel@tonic-gate  * Return 1 if cpuid could be recovered, 0 if failed.
692*0Sstevel@tonic-gate  */
693*0Sstevel@tonic-gate int
694*0Sstevel@tonic-gate mondo_recover_proc(uint16_t cpuid, int bn)
695*0Sstevel@tonic-gate {
696*0Sstevel@tonic-gate 	label_t ljb;
697*0Sstevel@tonic-gate 	cpu_t *cp;
698*0Sstevel@tonic-gate 	kthread_t *t;
699*0Sstevel@tonic-gate 	proc_t *p;
700*0Sstevel@tonic-gate 	struct as *as;
701*0Sstevel@tonic-gate 	struct hat *hat;
702*0Sstevel@tonic-gate 	short  cnum;
703*0Sstevel@tonic-gate 	struct tsb_info *tsbinfop;
704*0Sstevel@tonic-gate 	struct tsbe *tsbep;
705*0Sstevel@tonic-gate 	caddr_t tsbp;
706*0Sstevel@tonic-gate 	caddr_t end_tsbp;
707*0Sstevel@tonic-gate 	uint64_t paddr;
708*0Sstevel@tonic-gate 	uint64_t idsr;
709*0Sstevel@tonic-gate 	u_longlong_t pahi, palo;
710*0Sstevel@tonic-gate 	int pages_claimed = 0;
711*0Sstevel@tonic-gate 	tte_t tsbe_tte;
712*0Sstevel@tonic-gate 	int tried_kernel_tsb = 0;
713*0Sstevel@tonic-gate 
714*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(proc_entry);
715*0Sstevel@tonic-gate 
716*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
717*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718*0Sstevel@tonic-gate 		goto badstruct;
719*0Sstevel@tonic-gate 	}
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	if ((cp = cpu[cpuid]) == NULL) {
722*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723*0Sstevel@tonic-gate 		goto badstruct;
724*0Sstevel@tonic-gate 	}
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	if ((t = cp->cpu_thread) == NULL) {
727*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_thread);
728*0Sstevel@tonic-gate 		goto badstruct;
729*0Sstevel@tonic-gate 	}
730*0Sstevel@tonic-gate 
731*0Sstevel@tonic-gate 	if ((p = ttoproc(t)) == NULL) {
732*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_proc);
733*0Sstevel@tonic-gate 		goto badstruct;
734*0Sstevel@tonic-gate 	}
735*0Sstevel@tonic-gate 
736*0Sstevel@tonic-gate 	if ((as = p->p_as) == NULL) {
737*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_as);
738*0Sstevel@tonic-gate 		goto badstruct;
739*0Sstevel@tonic-gate 	}
740*0Sstevel@tonic-gate 
741*0Sstevel@tonic-gate 	if ((hat = as->a_hat) == NULL) {
742*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_hat);
743*0Sstevel@tonic-gate 		goto badstruct;
744*0Sstevel@tonic-gate 	}
745*0Sstevel@tonic-gate 
746*0Sstevel@tonic-gate 	if (hat != ksfmmup) {
747*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_user);
748*0Sstevel@tonic-gate 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749*0Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750*0Sstevel@tonic-gate 			goto badstruct;
751*0Sstevel@tonic-gate 		}
752*0Sstevel@tonic-gate 		tsbinfop = hat->sfmmu_tsb;
753*0Sstevel@tonic-gate 		if (tsbinfop == NULL) {
754*0Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755*0Sstevel@tonic-gate 			goto badstruct;
756*0Sstevel@tonic-gate 		}
757*0Sstevel@tonic-gate 		tsbp = tsbinfop->tsb_va;
758*0Sstevel@tonic-gate 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759*0Sstevel@tonic-gate 	} else {
760*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761*0Sstevel@tonic-gate 		tsbinfop = NULL;
762*0Sstevel@tonic-gate 		tsbp = ktsb_base;
763*0Sstevel@tonic-gate 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764*0Sstevel@tonic-gate 	}
765*0Sstevel@tonic-gate 
766*0Sstevel@tonic-gate 	/* Verify as */
767*0Sstevel@tonic-gate 	if (hat->sfmmu_as != as) {
768*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769*0Sstevel@tonic-gate 		goto badstruct;
770*0Sstevel@tonic-gate 	}
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate 	cnum = hat->sfmmu_cnum;
773*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777*0Sstevel@tonic-gate 		goto badstruct;
778*0Sstevel@tonic-gate 	}
779*0Sstevel@tonic-gate 
780*0Sstevel@tonic-gate 	do {
781*0Sstevel@tonic-gate 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782*0Sstevel@tonic-gate 
783*0Sstevel@tonic-gate 		/*
784*0Sstevel@tonic-gate 		 * Skip TSBs being relocated.  This is important because
785*0Sstevel@tonic-gate 		 * we want to avoid the following deadlock scenario:
786*0Sstevel@tonic-gate 		 *
787*0Sstevel@tonic-gate 		 * 1) when we came in we set ourselves to "in recover" state.
788*0Sstevel@tonic-gate 		 * 2) when we try to touch TSB being relocated the mapping
789*0Sstevel@tonic-gate 		 *    will be in the suspended state so we'll spin waiting
790*0Sstevel@tonic-gate 		 *    for it to be unlocked.
791*0Sstevel@tonic-gate 		 * 3) when the CPU that holds the TSB mapping locked tries to
792*0Sstevel@tonic-gate 		 *    unlock it it will send a xtrap which will fail to xcall
793*0Sstevel@tonic-gate 		 *    us or the CPU we're trying to recover, and will in turn
794*0Sstevel@tonic-gate 		 *    enter the mondo code.
795*0Sstevel@tonic-gate 		 * 4) since we are still spinning on the locked mapping
796*0Sstevel@tonic-gate 		 *    no further progress will be made and the system will
797*0Sstevel@tonic-gate 		 *    inevitably hard hang.
798*0Sstevel@tonic-gate 		 *
799*0Sstevel@tonic-gate 		 * A TSB not being relocated can't begin being relocated
800*0Sstevel@tonic-gate 		 * while we're accessing it because we check
801*0Sstevel@tonic-gate 		 * sendmondo_in_recover before relocating TSBs.
802*0Sstevel@tonic-gate 		 */
803*0Sstevel@tonic-gate 		if (hat != ksfmmup &&
804*0Sstevel@tonic-gate 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805*0Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806*0Sstevel@tonic-gate 			goto next_tsbinfo;
807*0Sstevel@tonic-gate 		}
808*0Sstevel@tonic-gate 
809*0Sstevel@tonic-gate 		for (tsbep = (struct tsbe *)tsbp;
810*0Sstevel@tonic-gate 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811*0Sstevel@tonic-gate 			tsbe_tte = tsbep->tte_data;
812*0Sstevel@tonic-gate 
813*0Sstevel@tonic-gate 			if (tsbe_tte.tte_val == 0) {
814*0Sstevel@tonic-gate 				/*
815*0Sstevel@tonic-gate 				 * Invalid tte
816*0Sstevel@tonic-gate 				 */
817*0Sstevel@tonic-gate 				continue;
818*0Sstevel@tonic-gate 			}
819*0Sstevel@tonic-gate 			if (tsbe_tte.tte_se) {
820*0Sstevel@tonic-gate 				/*
821*0Sstevel@tonic-gate 				 * Don't want device registers
822*0Sstevel@tonic-gate 				 */
823*0Sstevel@tonic-gate 				continue;
824*0Sstevel@tonic-gate 			}
825*0Sstevel@tonic-gate 			if (tsbe_tte.tte_cp == 0) {
826*0Sstevel@tonic-gate 				/*
827*0Sstevel@tonic-gate 				 * Must be cached in E$
828*0Sstevel@tonic-gate 				 */
829*0Sstevel@tonic-gate 				continue;
830*0Sstevel@tonic-gate 			}
831*0Sstevel@tonic-gate 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832*0Sstevel@tonic-gate 			idsr = getidsr();
833*0Sstevel@tonic-gate 			if ((idsr & (IDSR_NACK_BIT(bn) |
834*0Sstevel@tonic-gate 			    IDSR_BUSY_BIT(bn))) == 0) {
835*0Sstevel@tonic-gate 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836*0Sstevel@tonic-gate 				goto done;
837*0Sstevel@tonic-gate 			}
838*0Sstevel@tonic-gate 			pahi = tsbe_tte.tte_pahi;
839*0Sstevel@tonic-gate 			palo = tsbe_tte.tte_palo;
840*0Sstevel@tonic-gate 			paddr = (uint64_t)((pahi << 32) |
841*0Sstevel@tonic-gate 			    (palo << MMU_PAGESHIFT));
842*0Sstevel@tonic-gate 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843*0Sstevel@tonic-gate 			    CH_ECACHE_SUBBLK_SIZE);
844*0Sstevel@tonic-gate 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845*0Sstevel@tonic-gate 				shipit(cpuid, bn);
846*0Sstevel@tonic-gate 			}
847*0Sstevel@tonic-gate 			pages_claimed++;
848*0Sstevel@tonic-gate 		}
849*0Sstevel@tonic-gate next_tsbinfo:
850*0Sstevel@tonic-gate 		if (tsbinfop != NULL)
851*0Sstevel@tonic-gate 			tsbinfop = tsbinfop->tsb_next;
852*0Sstevel@tonic-gate 		if (tsbinfop != NULL) {
853*0Sstevel@tonic-gate 			tsbp = tsbinfop->tsb_va;
854*0Sstevel@tonic-gate 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855*0Sstevel@tonic-gate 		} else if (tsbp == ktsb_base) {
856*0Sstevel@tonic-gate 			tried_kernel_tsb = 1;
857*0Sstevel@tonic-gate 		} else if (!tried_kernel_tsb) {
858*0Sstevel@tonic-gate 			tsbp = ktsb_base;
859*0Sstevel@tonic-gate 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860*0Sstevel@tonic-gate 			hat = ksfmmup;
861*0Sstevel@tonic-gate 			tsbinfop = NULL;
862*0Sstevel@tonic-gate 		}
863*0Sstevel@tonic-gate 	} while (tsbinfop != NULL ||
864*0Sstevel@tonic-gate 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868*0Sstevel@tonic-gate 	no_fault();
869*0Sstevel@tonic-gate 	idsr = getidsr();
870*0Sstevel@tonic-gate 	if ((idsr & (IDSR_NACK_BIT(bn) |
871*0Sstevel@tonic-gate 	    IDSR_BUSY_BIT(bn))) == 0) {
872*0Sstevel@tonic-gate 		return (1);
873*0Sstevel@tonic-gate 	} else {
874*0Sstevel@tonic-gate 		return (0);
875*0Sstevel@tonic-gate 	}
876*0Sstevel@tonic-gate 
877*0Sstevel@tonic-gate done:
878*0Sstevel@tonic-gate 	no_fault();
879*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880*0Sstevel@tonic-gate 	return (1);
881*0Sstevel@tonic-gate 
882*0Sstevel@tonic-gate badstruct:
883*0Sstevel@tonic-gate 	no_fault();
884*0Sstevel@tonic-gate 	return (0);
885*0Sstevel@tonic-gate }
886*0Sstevel@tonic-gate 
887*0Sstevel@tonic-gate /*
888*0Sstevel@tonic-gate  * Attempt to claim ownership, temporarily, of every cache line that a
889*0Sstevel@tonic-gate  * non-responsive cpu might be using.  This might kick that cpu out of
890*0Sstevel@tonic-gate  * this state.
891*0Sstevel@tonic-gate  *
892*0Sstevel@tonic-gate  * The return value indicates to the caller if we have exhausted all recovery
893*0Sstevel@tonic-gate  * techniques. If 1 is returned, it is useless to call this function again
894*0Sstevel@tonic-gate  * even for a different target CPU.
895*0Sstevel@tonic-gate  */
896*0Sstevel@tonic-gate int
897*0Sstevel@tonic-gate mondo_recover(uint16_t cpuid, int bn)
898*0Sstevel@tonic-gate {
899*0Sstevel@tonic-gate 	struct memseg *seg;
900*0Sstevel@tonic-gate 	uint64_t begin_pa, end_pa, cur_pa;
901*0Sstevel@tonic-gate 	hrtime_t begin_hrt, end_hrt;
902*0Sstevel@tonic-gate 	int retval = 0;
903*0Sstevel@tonic-gate 	int pages_claimed = 0;
904*0Sstevel@tonic-gate 	cheetah_livelock_entry_t *histp;
905*0Sstevel@tonic-gate 	uint64_t idsr;
906*0Sstevel@tonic-gate 
907*0Sstevel@tonic-gate 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908*0Sstevel@tonic-gate 		/*
909*0Sstevel@tonic-gate 		 * Wait while recovery takes place
910*0Sstevel@tonic-gate 		 */
911*0Sstevel@tonic-gate 		while (sendmondo_in_recover) {
912*0Sstevel@tonic-gate 			drv_usecwait(1);
913*0Sstevel@tonic-gate 		}
914*0Sstevel@tonic-gate 		/*
915*0Sstevel@tonic-gate 		 * Assume we didn't claim the whole memory. If
916*0Sstevel@tonic-gate 		 * the target of this caller is not recovered,
917*0Sstevel@tonic-gate 		 * it will come back.
918*0Sstevel@tonic-gate 		 */
919*0Sstevel@tonic-gate 		return (retval);
920*0Sstevel@tonic-gate 	}
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 	begin_hrt = gethrtime_waitfree();
928*0Sstevel@tonic-gate 	/*
929*0Sstevel@tonic-gate 	 * First try to claim the lines in the TSB the target
930*0Sstevel@tonic-gate 	 * may have been using.
931*0Sstevel@tonic-gate 	 */
932*0Sstevel@tonic-gate 	if (mondo_recover_proc(cpuid, bn) == 1) {
933*0Sstevel@tonic-gate 		/*
934*0Sstevel@tonic-gate 		 * Didn't claim the whole memory
935*0Sstevel@tonic-gate 		 */
936*0Sstevel@tonic-gate 		goto done;
937*0Sstevel@tonic-gate 	}
938*0Sstevel@tonic-gate 
939*0Sstevel@tonic-gate 	/*
940*0Sstevel@tonic-gate 	 * We tried using the TSB. The target is still
941*0Sstevel@tonic-gate 	 * not recovered. Check if complete memory scan is
942*0Sstevel@tonic-gate 	 * enabled.
943*0Sstevel@tonic-gate 	 */
944*0Sstevel@tonic-gate 	if (cheetah_sendmondo_fullscan == 0) {
945*0Sstevel@tonic-gate 		/*
946*0Sstevel@tonic-gate 		 * Full memory scan is disabled.
947*0Sstevel@tonic-gate 		 */
948*0Sstevel@tonic-gate 		retval = 1;
949*0Sstevel@tonic-gate 		goto done;
950*0Sstevel@tonic-gate 	}
951*0Sstevel@tonic-gate 
952*0Sstevel@tonic-gate 	/*
953*0Sstevel@tonic-gate 	 * Try claiming the whole memory.
954*0Sstevel@tonic-gate 	 */
955*0Sstevel@tonic-gate 	for (seg = memsegs; seg; seg = seg->next) {
956*0Sstevel@tonic-gate 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957*0Sstevel@tonic-gate 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958*0Sstevel@tonic-gate 		for (cur_pa = begin_pa; cur_pa < end_pa;
959*0Sstevel@tonic-gate 		    cur_pa += MMU_PAGESIZE) {
960*0Sstevel@tonic-gate 			idsr = getidsr();
961*0Sstevel@tonic-gate 			if ((idsr & (IDSR_NACK_BIT(bn) |
962*0Sstevel@tonic-gate 			    IDSR_BUSY_BIT(bn))) == 0) {
963*0Sstevel@tonic-gate 				/*
964*0Sstevel@tonic-gate 				 * Didn't claim all memory
965*0Sstevel@tonic-gate 				 */
966*0Sstevel@tonic-gate 				goto done;
967*0Sstevel@tonic-gate 			}
968*0Sstevel@tonic-gate 			claimlines(cur_pa, MMU_PAGESIZE,
969*0Sstevel@tonic-gate 			    CH_ECACHE_SUBBLK_SIZE);
970*0Sstevel@tonic-gate 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971*0Sstevel@tonic-gate 				shipit(cpuid, bn);
972*0Sstevel@tonic-gate 			}
973*0Sstevel@tonic-gate 			pages_claimed++;
974*0Sstevel@tonic-gate 		}
975*0Sstevel@tonic-gate 	}
976*0Sstevel@tonic-gate 
977*0Sstevel@tonic-gate 	/*
978*0Sstevel@tonic-gate 	 * We did all we could.
979*0Sstevel@tonic-gate 	 */
980*0Sstevel@tonic-gate 	retval = 1;
981*0Sstevel@tonic-gate 
982*0Sstevel@tonic-gate done:
983*0Sstevel@tonic-gate 	/*
984*0Sstevel@tonic-gate 	 * Update statistics
985*0Sstevel@tonic-gate 	 */
986*0Sstevel@tonic-gate 	end_hrt = gethrtime_waitfree();
987*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_STAT(recovery);
988*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990*0Sstevel@tonic-gate 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991*0Sstevel@tonic-gate 	    (end_hrt -  begin_hrt));
992*0Sstevel@tonic-gate 
993*0Sstevel@tonic-gate 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994*0Sstevel@tonic-gate 
995*0Sstevel@tonic-gate 	return (retval);
996*0Sstevel@tonic-gate }
997*0Sstevel@tonic-gate 
998*0Sstevel@tonic-gate /*
999*0Sstevel@tonic-gate  * This is called by the cyclic framework when this CPU becomes online
1000*0Sstevel@tonic-gate  */
1001*0Sstevel@tonic-gate /*ARGSUSED*/
1002*0Sstevel@tonic-gate static void
1003*0Sstevel@tonic-gate cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004*0Sstevel@tonic-gate {
1005*0Sstevel@tonic-gate 
1006*0Sstevel@tonic-gate 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007*0Sstevel@tonic-gate 	hdlr->cyh_level = CY_LOW_LEVEL;
1008*0Sstevel@tonic-gate 	hdlr->cyh_arg = NULL;
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate 	/*
1011*0Sstevel@tonic-gate 	 * Stagger the start time
1012*0Sstevel@tonic-gate 	 */
1013*0Sstevel@tonic-gate 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014*0Sstevel@tonic-gate 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015*0Sstevel@tonic-gate 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016*0Sstevel@tonic-gate 	}
1017*0Sstevel@tonic-gate 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018*0Sstevel@tonic-gate }
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate /*
1021*0Sstevel@tonic-gate  * Create a low level cyclic to send a xtrap to the next cpu online.
1022*0Sstevel@tonic-gate  * However, there's no need to have this running on a uniprocessor system.
1023*0Sstevel@tonic-gate  */
1024*0Sstevel@tonic-gate static void
1025*0Sstevel@tonic-gate cheetah_nudge_init(void)
1026*0Sstevel@tonic-gate {
1027*0Sstevel@tonic-gate 	cyc_omni_handler_t hdlr;
1028*0Sstevel@tonic-gate 
1029*0Sstevel@tonic-gate 	if (max_ncpus == 1) {
1030*0Sstevel@tonic-gate 		return;
1031*0Sstevel@tonic-gate 	}
1032*0Sstevel@tonic-gate 
1033*0Sstevel@tonic-gate 	hdlr.cyo_online = cheetah_nudge_onln;
1034*0Sstevel@tonic-gate 	hdlr.cyo_offline = NULL;
1035*0Sstevel@tonic-gate 	hdlr.cyo_arg = NULL;
1036*0Sstevel@tonic-gate 
1037*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1038*0Sstevel@tonic-gate 	(void) cyclic_add_omni(&hdlr);
1039*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1040*0Sstevel@tonic-gate }
1041*0Sstevel@tonic-gate 
1042*0Sstevel@tonic-gate /*
1043*0Sstevel@tonic-gate  * Cyclic handler to wake up buddy
1044*0Sstevel@tonic-gate  */
1045*0Sstevel@tonic-gate void
1046*0Sstevel@tonic-gate cheetah_nudge_buddy(void)
1047*0Sstevel@tonic-gate {
1048*0Sstevel@tonic-gate 	/*
1049*0Sstevel@tonic-gate 	 * Disable kernel preemption to protect the cpu list
1050*0Sstevel@tonic-gate 	 */
1051*0Sstevel@tonic-gate 	kpreempt_disable();
1052*0Sstevel@tonic-gate 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053*0Sstevel@tonic-gate 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054*0Sstevel@tonic-gate 		    0, 0);
1055*0Sstevel@tonic-gate 	}
1056*0Sstevel@tonic-gate 	kpreempt_enable();
1057*0Sstevel@tonic-gate }
1058*0Sstevel@tonic-gate 
1059*0Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060*0Sstevel@tonic-gate 
1061*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
1062*0Sstevel@tonic-gate uint32_t x_one_stimes[64];
1063*0Sstevel@tonic-gate uint32_t x_one_ltimes[16];
1064*0Sstevel@tonic-gate uint32_t x_set_stimes[64];
1065*0Sstevel@tonic-gate uint32_t x_set_ltimes[16];
1066*0Sstevel@tonic-gate uint32_t x_set_cpus[NCPU];
1067*0Sstevel@tonic-gate uint32_t x_nack_stimes[64];
1068*0Sstevel@tonic-gate #endif
1069*0Sstevel@tonic-gate 
1070*0Sstevel@tonic-gate /*
1071*0Sstevel@tonic-gate  * Note: A version of this function is used by the debugger via the KDI,
1072*0Sstevel@tonic-gate  * and must be kept in sync with this version.  Any changes made to this
1073*0Sstevel@tonic-gate  * function to support new chips or to accomodate errata must also be included
1074*0Sstevel@tonic-gate  * in the KDI-specific version.  See us3_kdi.c.
1075*0Sstevel@tonic-gate  */
1076*0Sstevel@tonic-gate void
1077*0Sstevel@tonic-gate send_one_mondo(int cpuid)
1078*0Sstevel@tonic-gate {
1079*0Sstevel@tonic-gate 	int busy, nack;
1080*0Sstevel@tonic-gate 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081*0Sstevel@tonic-gate 	uint64_t busymask;
1082*0Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
1083*0Sstevel@tonic-gate 	int recovered = 0;
1084*0Sstevel@tonic-gate #endif
1085*0Sstevel@tonic-gate 
1086*0Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087*0Sstevel@tonic-gate 	starttick = lasttick = gettick();
1088*0Sstevel@tonic-gate 	shipit(cpuid, 0);
1089*0Sstevel@tonic-gate 	endtick = starttick + xc_tick_limit;
1090*0Sstevel@tonic-gate 	busy = nack = 0;
1091*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
1092*0Sstevel@tonic-gate 	/*
1093*0Sstevel@tonic-gate 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094*0Sstevel@tonic-gate 	 * will be used for dispatching interrupt. For now, assume
1095*0Sstevel@tonic-gate 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096*0Sstevel@tonic-gate 	 * issues with respect to BUSY/NACK pair usage.
1097*0Sstevel@tonic-gate 	 */
1098*0Sstevel@tonic-gate 	busymask  = IDSR_BUSY_BIT(cpuid);
1099*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
1100*0Sstevel@tonic-gate 	busymask = IDSR_BUSY;
1101*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
1102*0Sstevel@tonic-gate 	for (;;) {
1103*0Sstevel@tonic-gate 		idsr = getidsr();
1104*0Sstevel@tonic-gate 		if (idsr == 0)
1105*0Sstevel@tonic-gate 			break;
1106*0Sstevel@tonic-gate 
1107*0Sstevel@tonic-gate 		tick = gettick();
1108*0Sstevel@tonic-gate 		/*
1109*0Sstevel@tonic-gate 		 * If there is a big jump between the current tick
1110*0Sstevel@tonic-gate 		 * count and lasttick, we have probably hit a break
1111*0Sstevel@tonic-gate 		 * point.  Adjust endtick accordingly to avoid panic.
1112*0Sstevel@tonic-gate 		 */
1113*0Sstevel@tonic-gate 		if (tick > (lasttick + xc_tick_jump_limit))
1114*0Sstevel@tonic-gate 			endtick += (tick - lasttick);
1115*0Sstevel@tonic-gate 		lasttick = tick;
1116*0Sstevel@tonic-gate 		if (tick > endtick) {
1117*0Sstevel@tonic-gate 			if (panic_quiesce)
1118*0Sstevel@tonic-gate 				return;
1119*0Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
1120*0Sstevel@tonic-gate 			if (cheetah_sendmondo_recover && recovered == 0) {
1121*0Sstevel@tonic-gate 				if (mondo_recover(cpuid, 0)) {
1122*0Sstevel@tonic-gate 					/*
1123*0Sstevel@tonic-gate 					 * We claimed the whole memory or
1124*0Sstevel@tonic-gate 					 * full scan is disabled.
1125*0Sstevel@tonic-gate 					 */
1126*0Sstevel@tonic-gate 					recovered++;
1127*0Sstevel@tonic-gate 				}
1128*0Sstevel@tonic-gate 				tick = gettick();
1129*0Sstevel@tonic-gate 				endtick = tick + xc_tick_limit;
1130*0Sstevel@tonic-gate 				lasttick = tick;
1131*0Sstevel@tonic-gate 				/*
1132*0Sstevel@tonic-gate 				 * Recheck idsr
1133*0Sstevel@tonic-gate 				 */
1134*0Sstevel@tonic-gate 				continue;
1135*0Sstevel@tonic-gate 			} else
1136*0Sstevel@tonic-gate #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137*0Sstevel@tonic-gate 			{
1138*0Sstevel@tonic-gate 				cmn_err(CE_PANIC, "send mondo timeout "
1139*0Sstevel@tonic-gate 				    "(target 0x%x) [%d NACK %d BUSY]",
1140*0Sstevel@tonic-gate 				    cpuid, nack, busy);
1141*0Sstevel@tonic-gate 			}
1142*0Sstevel@tonic-gate 		}
1143*0Sstevel@tonic-gate 
1144*0Sstevel@tonic-gate 		if (idsr & busymask) {
1145*0Sstevel@tonic-gate 			busy++;
1146*0Sstevel@tonic-gate 			continue;
1147*0Sstevel@tonic-gate 		}
1148*0Sstevel@tonic-gate 		drv_usecwait(1);
1149*0Sstevel@tonic-gate 		shipit(cpuid, 0);
1150*0Sstevel@tonic-gate 		nack++;
1151*0Sstevel@tonic-gate 		busy = 0;
1152*0Sstevel@tonic-gate 	}
1153*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
1154*0Sstevel@tonic-gate 	{
1155*0Sstevel@tonic-gate 		int n = gettick() - starttick;
1156*0Sstevel@tonic-gate 		if (n < 8192)
1157*0Sstevel@tonic-gate 			x_one_stimes[n >> 7]++;
1158*0Sstevel@tonic-gate 		else
1159*0Sstevel@tonic-gate 			x_one_ltimes[(n >> 13) & 0xf]++;
1160*0Sstevel@tonic-gate 	}
1161*0Sstevel@tonic-gate #endif
1162*0Sstevel@tonic-gate }
1163*0Sstevel@tonic-gate 
1164*0Sstevel@tonic-gate void
1165*0Sstevel@tonic-gate syncfpu(void)
1166*0Sstevel@tonic-gate {
1167*0Sstevel@tonic-gate }
1168*0Sstevel@tonic-gate 
1169*0Sstevel@tonic-gate /*
1170*0Sstevel@tonic-gate  * Return processor specific async error structure
1171*0Sstevel@tonic-gate  * size used.
1172*0Sstevel@tonic-gate  */
1173*0Sstevel@tonic-gate int
1174*0Sstevel@tonic-gate cpu_aflt_size(void)
1175*0Sstevel@tonic-gate {
1176*0Sstevel@tonic-gate 	return (sizeof (ch_async_flt_t));
1177*0Sstevel@tonic-gate }
1178*0Sstevel@tonic-gate 
1179*0Sstevel@tonic-gate /*
1180*0Sstevel@tonic-gate  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1181*0Sstevel@tonic-gate  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1182*0Sstevel@tonic-gate  * flush the error that caused the UCU/UCC, then again here at the end to
1183*0Sstevel@tonic-gate  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1184*0Sstevel@tonic-gate  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1185*0Sstevel@tonic-gate  * another Fast ECC trap.
1186*0Sstevel@tonic-gate  *
1187*0Sstevel@tonic-gate  * Cheetah+ also handles: TSCE: No additional processing required.
1188*0Sstevel@tonic-gate  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1189*0Sstevel@tonic-gate  *
1190*0Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
1191*0Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
1192*0Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
1193*0Sstevel@tonic-gate  */
1194*0Sstevel@tonic-gate /*ARGSUSED*/
1195*0Sstevel@tonic-gate void
1196*0Sstevel@tonic-gate cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1197*0Sstevel@tonic-gate {
1198*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
1199*0Sstevel@tonic-gate 	uint64_t ceen;
1200*0Sstevel@tonic-gate 
1201*0Sstevel@tonic-gate 	/*
1202*0Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
1203*0Sstevel@tonic-gate 	 * pointer, then we will have to make due without any detailed
1204*0Sstevel@tonic-gate 	 * logout information.
1205*0Sstevel@tonic-gate 	 */
1206*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
1207*0Sstevel@tonic-gate 		clop = NULL;
1208*0Sstevel@tonic-gate 		ceen = p_clo_flags & EN_REG_CEEN;
1209*0Sstevel@tonic-gate 	} else {
1210*0Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1211*0Sstevel@tonic-gate 		ceen = clop->clo_flags & EN_REG_CEEN;
1212*0Sstevel@tonic-gate 	}
1213*0Sstevel@tonic-gate 
1214*0Sstevel@tonic-gate 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1215*0Sstevel@tonic-gate 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, clop);
1216*0Sstevel@tonic-gate }
1217*0Sstevel@tonic-gate 
1218*0Sstevel@tonic-gate /*
1219*0Sstevel@tonic-gate  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1220*0Sstevel@tonic-gate  * ECC at TL>0.  Need to supply either a error register pointer or a
1221*0Sstevel@tonic-gate  * cpu logout structure pointer.
1222*0Sstevel@tonic-gate  */
1223*0Sstevel@tonic-gate static void
1224*0Sstevel@tonic-gate cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1225*0Sstevel@tonic-gate     ch_cpu_logout_t *clop)
1226*0Sstevel@tonic-gate {
1227*0Sstevel@tonic-gate 	struct async_flt *aflt;
1228*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
1229*0Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1230*0Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
1231*0Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
1232*0Sstevel@tonic-gate 
1233*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1234*0Sstevel@tonic-gate 	/*
1235*0Sstevel@tonic-gate 	 * If no cpu logout data, then we will have to make due without
1236*0Sstevel@tonic-gate 	 * any detailed logout information.
1237*0Sstevel@tonic-gate 	 */
1238*0Sstevel@tonic-gate 	if (clop == NULL) {
1239*0Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1240*0Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
1241*0Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
1242*0Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
1243*0Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
1244*0Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
1245*0Sstevel@tonic-gate #if defined(SERRANO)
1246*0Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
1247*0Sstevel@tonic-gate #endif	/* SERRANO */
1248*0Sstevel@tonic-gate 	} else {
1249*0Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
1250*0Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
1251*0Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1252*0Sstevel@tonic-gate #if defined(SERRANO)
1253*0Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1254*0Sstevel@tonic-gate #endif	/* SERRANO */
1255*0Sstevel@tonic-gate 	}
1256*0Sstevel@tonic-gate 
1257*0Sstevel@tonic-gate 	/*
1258*0Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
1259*0Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1260*0Sstevel@tonic-gate 	 * sticky bits.
1261*0Sstevel@tonic-gate 	 */
1262*0Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1263*0Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
1264*0Sstevel@tonic-gate 	pr_reason[0] = '\0';
1265*0Sstevel@tonic-gate 
1266*0Sstevel@tonic-gate 	/* Setup the async fault structure */
1267*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
1268*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
1269*0Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
1270*0Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
1271*0Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
1272*0Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
1273*0Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
1274*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
1275*0Sstevel@tonic-gate 	aflt->flt_pc = tpc;
1276*0Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
1277*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
1278*0Sstevel@tonic-gate 	aflt->flt_priv = priv;
1279*0Sstevel@tonic-gate 	aflt->flt_tl = tl;
1280*0Sstevel@tonic-gate 	aflt->flt_status = ECC_F_TRAP;
1281*0Sstevel@tonic-gate 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1282*0Sstevel@tonic-gate 
1283*0Sstevel@tonic-gate 	/*
1284*0Sstevel@tonic-gate 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1285*0Sstevel@tonic-gate 	 * cmn_err messages out to the console.  The situation is a UCU (in
1286*0Sstevel@tonic-gate 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1287*0Sstevel@tonic-gate 	 * The messages for the UCU and WDU are enqueued and then pulled off
1288*0Sstevel@tonic-gate 	 * the async queue via softint and syslogd starts to process them
1289*0Sstevel@tonic-gate 	 * but doesn't get them to the console.  The UE causes a panic, but
1290*0Sstevel@tonic-gate 	 * since the UCU/WDU messages are already in transit, those aren't
1291*0Sstevel@tonic-gate 	 * on the async queue.  The hack is to check if we have a matching
1292*0Sstevel@tonic-gate 	 * WDU event for the UCU, and if it matches, we're more than likely
1293*0Sstevel@tonic-gate 	 * going to panic with a UE, unless we're under protection.  So, we
1294*0Sstevel@tonic-gate 	 * check to see if we got a matching WDU event and if we're under
1295*0Sstevel@tonic-gate 	 * protection.
1296*0Sstevel@tonic-gate 	 *
1297*0Sstevel@tonic-gate 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1298*0Sstevel@tonic-gate 	 * looks like this:
1299*0Sstevel@tonic-gate 	 *    UCU->WDU->UE
1300*0Sstevel@tonic-gate 	 * For Panther, it could look like either of these:
1301*0Sstevel@tonic-gate 	 *    UCU---->WDU->L3_WDU->UE
1302*0Sstevel@tonic-gate 	 *    L3_UCU->WDU->L3_WDU->UE
1303*0Sstevel@tonic-gate 	 */
1304*0Sstevel@tonic-gate 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1305*0Sstevel@tonic-gate 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1306*0Sstevel@tonic-gate 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1307*0Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
1308*0Sstevel@tonic-gate 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1309*0Sstevel@tonic-gate 		    (cpu_error_regs.afar == t_afar));
1310*0Sstevel@tonic-gate 		aflt->flt_panic |= ((clop == NULL) &&
1311*0Sstevel@tonic-gate 		    (t_afsr_errs & C_AFSR_WDU));
1312*0Sstevel@tonic-gate 	}
1313*0Sstevel@tonic-gate 
1314*0Sstevel@tonic-gate 	/*
1315*0Sstevel@tonic-gate 	 * Queue events on the async event queue, one event per error bit.
1316*0Sstevel@tonic-gate 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1317*0Sstevel@tonic-gate 	 * queue an event to complain.
1318*0Sstevel@tonic-gate 	 */
1319*0Sstevel@tonic-gate 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1320*0Sstevel@tonic-gate 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1321*0Sstevel@tonic-gate 		ch_flt.flt_type = CPU_INV_AFSR;
1322*0Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1323*0Sstevel@tonic-gate 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1324*0Sstevel@tonic-gate 		    aflt->flt_panic);
1325*0Sstevel@tonic-gate 	}
1326*0Sstevel@tonic-gate 
1327*0Sstevel@tonic-gate 	/*
1328*0Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
1329*0Sstevel@tonic-gate 	 */
1330*0Sstevel@tonic-gate 	if (clop) {
1331*0Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
1332*0Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1333*0Sstevel@tonic-gate 	}
1334*0Sstevel@tonic-gate 
1335*0Sstevel@tonic-gate 	/*
1336*0Sstevel@tonic-gate 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1337*0Sstevel@tonic-gate 	 * or disrupting errors have happened.  We do this because if a
1338*0Sstevel@tonic-gate 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1339*0Sstevel@tonic-gate 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1340*0Sstevel@tonic-gate 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1341*0Sstevel@tonic-gate 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1342*0Sstevel@tonic-gate 	 * deferred or disrupting error happening between checking the AFSR and
1343*0Sstevel@tonic-gate 	 * enabling NCEEN/CEEN.
1344*0Sstevel@tonic-gate 	 *
1345*0Sstevel@tonic-gate 	 * Note: CEEN reenabled only if it was on when trap taken.
1346*0Sstevel@tonic-gate 	 */
1347*0Sstevel@tonic-gate 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1348*0Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
1349*0Sstevel@tonic-gate 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1350*0Sstevel@tonic-gate 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1351*0Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1352*0Sstevel@tonic-gate 		    NULL);
1353*0Sstevel@tonic-gate 	}
1354*0Sstevel@tonic-gate 
1355*0Sstevel@tonic-gate 	/*
1356*0Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1357*0Sstevel@tonic-gate 	 * be logged as part of the panic flow.
1358*0Sstevel@tonic-gate 	 */
1359*0Sstevel@tonic-gate 	if (aflt->flt_panic)
1360*0Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
1361*0Sstevel@tonic-gate 
1362*0Sstevel@tonic-gate 	/*
1363*0Sstevel@tonic-gate 	 * Flushing the Ecache here gets the part of the trap handler that
1364*0Sstevel@tonic-gate 	 * is run at TL=1 out of the Ecache.
1365*0Sstevel@tonic-gate 	 */
1366*0Sstevel@tonic-gate 	cpu_flush_ecache();
1367*0Sstevel@tonic-gate }
1368*0Sstevel@tonic-gate 
1369*0Sstevel@tonic-gate /*
1370*0Sstevel@tonic-gate  * This is called via sys_trap from pil15_interrupt code if the
1371*0Sstevel@tonic-gate  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1372*0Sstevel@tonic-gate  * various ch_err_tl1_data structures for valid entries based on the bit
1373*0Sstevel@tonic-gate  * settings in the ch_err_tl1_flags entry of the structure.
1374*0Sstevel@tonic-gate  */
1375*0Sstevel@tonic-gate /*ARGSUSED*/
1376*0Sstevel@tonic-gate void
1377*0Sstevel@tonic-gate cpu_tl1_error(struct regs *rp, int panic)
1378*0Sstevel@tonic-gate {
1379*0Sstevel@tonic-gate 	ch_err_tl1_data_t *cl1p, cl1;
1380*0Sstevel@tonic-gate 	int i, ncl1ps;
1381*0Sstevel@tonic-gate 	uint64_t me_flags;
1382*0Sstevel@tonic-gate 	uint64_t ceen;
1383*0Sstevel@tonic-gate 
1384*0Sstevel@tonic-gate 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1385*0Sstevel@tonic-gate 		cl1p = &ch_err_tl1_data;
1386*0Sstevel@tonic-gate 		ncl1ps = 1;
1387*0Sstevel@tonic-gate 	} else if (CPU_PRIVATE(CPU) != NULL) {
1388*0Sstevel@tonic-gate 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1389*0Sstevel@tonic-gate 		ncl1ps = CH_ERR_TL1_TLMAX;
1390*0Sstevel@tonic-gate 	} else {
1391*0Sstevel@tonic-gate 		ncl1ps = 0;
1392*0Sstevel@tonic-gate 	}
1393*0Sstevel@tonic-gate 
1394*0Sstevel@tonic-gate 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1395*0Sstevel@tonic-gate 		if (cl1p->ch_err_tl1_flags == 0)
1396*0Sstevel@tonic-gate 			continue;
1397*0Sstevel@tonic-gate 
1398*0Sstevel@tonic-gate 		/*
1399*0Sstevel@tonic-gate 		 * Grab a copy of the logout data and invalidate
1400*0Sstevel@tonic-gate 		 * the logout area.
1401*0Sstevel@tonic-gate 		 */
1402*0Sstevel@tonic-gate 		cl1 = *cl1p;
1403*0Sstevel@tonic-gate 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1404*0Sstevel@tonic-gate 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1405*0Sstevel@tonic-gate 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1406*0Sstevel@tonic-gate 
1407*0Sstevel@tonic-gate 		/*
1408*0Sstevel@tonic-gate 		 * Log "first error" in ch_err_tl1_data.
1409*0Sstevel@tonic-gate 		 */
1410*0Sstevel@tonic-gate 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1411*0Sstevel@tonic-gate 			ceen = get_error_enable() & EN_REG_CEEN;
1412*0Sstevel@tonic-gate 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1413*0Sstevel@tonic-gate 			    1, ceen, &cl1.ch_err_tl1_logout);
1414*0Sstevel@tonic-gate 		}
1415*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
1416*0Sstevel@tonic-gate 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1417*0Sstevel@tonic-gate 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1418*0Sstevel@tonic-gate 			    (caddr_t)cl1.ch_err_tl1_tpc);
1419*0Sstevel@tonic-gate 		}
1420*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
1421*0Sstevel@tonic-gate 
1422*0Sstevel@tonic-gate 		/*
1423*0Sstevel@tonic-gate 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1424*0Sstevel@tonic-gate 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1425*0Sstevel@tonic-gate 		 * if the structure is busy, we just do the cache flushing
1426*0Sstevel@tonic-gate 		 * we have to do and then do the retry.  So the AFSR/AFAR
1427*0Sstevel@tonic-gate 		 * at this point *should* have some relevant info.  If there
1428*0Sstevel@tonic-gate 		 * are no valid errors in the AFSR, we'll assume they've
1429*0Sstevel@tonic-gate 		 * already been picked up and logged.  For I$/D$ parity,
1430*0Sstevel@tonic-gate 		 * we just log an event with an "Unknown" (NULL) TPC.
1431*0Sstevel@tonic-gate 		 */
1432*0Sstevel@tonic-gate 		if (me_flags & CH_ERR_FECC) {
1433*0Sstevel@tonic-gate 			ch_cpu_errors_t cpu_error_regs;
1434*0Sstevel@tonic-gate 			uint64_t t_afsr_errs;
1435*0Sstevel@tonic-gate 
1436*0Sstevel@tonic-gate 			/*
1437*0Sstevel@tonic-gate 			 * Get the error registers and see if there's
1438*0Sstevel@tonic-gate 			 * a pending error.  If not, don't bother
1439*0Sstevel@tonic-gate 			 * generating an "Invalid AFSR" error event.
1440*0Sstevel@tonic-gate 			 */
1441*0Sstevel@tonic-gate 			get_cpu_error_state(&cpu_error_regs);
1442*0Sstevel@tonic-gate 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1443*0Sstevel@tonic-gate 			    C_AFSR_EXT_ALL_ERRS) |
1444*0Sstevel@tonic-gate 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1445*0Sstevel@tonic-gate 			if (t_afsr_errs != 0) {
1446*0Sstevel@tonic-gate 				ceen = get_error_enable() & EN_REG_CEEN;
1447*0Sstevel@tonic-gate 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1448*0Sstevel@tonic-gate 				    1, ceen, NULL);
1449*0Sstevel@tonic-gate 			}
1450*0Sstevel@tonic-gate 		}
1451*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
1452*0Sstevel@tonic-gate 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1453*0Sstevel@tonic-gate 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1454*0Sstevel@tonic-gate 		}
1455*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
1456*0Sstevel@tonic-gate 	}
1457*0Sstevel@tonic-gate }
1458*0Sstevel@tonic-gate 
1459*0Sstevel@tonic-gate /*
1460*0Sstevel@tonic-gate  * Called from Fast ECC TL>0 handler in case of fatal error.
1461*0Sstevel@tonic-gate  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1462*0Sstevel@tonic-gate  * but if we don't, we'll panic with something reasonable.
1463*0Sstevel@tonic-gate  */
1464*0Sstevel@tonic-gate /*ARGSUSED*/
1465*0Sstevel@tonic-gate void
1466*0Sstevel@tonic-gate cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1467*0Sstevel@tonic-gate {
1468*0Sstevel@tonic-gate 	cpu_tl1_error(rp, 1);
1469*0Sstevel@tonic-gate 	/*
1470*0Sstevel@tonic-gate 	 * Should never return, but just in case.
1471*0Sstevel@tonic-gate 	 */
1472*0Sstevel@tonic-gate 	fm_panic("Unsurvivable ECC Error at TL>0");
1473*0Sstevel@tonic-gate }
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate /*
1476*0Sstevel@tonic-gate  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1477*0Sstevel@tonic-gate  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1478*0Sstevel@tonic-gate  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1479*0Sstevel@tonic-gate  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1480*0Sstevel@tonic-gate  *
1481*0Sstevel@tonic-gate  * Cheetah+ also handles (No additional processing required):
1482*0Sstevel@tonic-gate  *    DUE, DTO, DBERR	(NCEEN controlled)
1483*0Sstevel@tonic-gate  *    THCE		(CEEN and ET_ECC_en controlled)
1484*0Sstevel@tonic-gate  *    TUE		(ET_ECC_en controlled)
1485*0Sstevel@tonic-gate  *
1486*0Sstevel@tonic-gate  * Panther further adds:
1487*0Sstevel@tonic-gate  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1488*0Sstevel@tonic-gate  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1489*0Sstevel@tonic-gate  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1490*0Sstevel@tonic-gate  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1491*0Sstevel@tonic-gate  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1492*0Sstevel@tonic-gate  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1493*0Sstevel@tonic-gate  *
1494*0Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
1495*0Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
1496*0Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
1497*0Sstevel@tonic-gate  */
1498*0Sstevel@tonic-gate /*ARGSUSED*/
1499*0Sstevel@tonic-gate void
1500*0Sstevel@tonic-gate cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1501*0Sstevel@tonic-gate {
1502*0Sstevel@tonic-gate 	struct async_flt *aflt;
1503*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
1504*0Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
1505*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
1506*0Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1507*0Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
1508*0Sstevel@tonic-gate 
1509*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1510*0Sstevel@tonic-gate 	/*
1511*0Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
1512*0Sstevel@tonic-gate 	 * pointer, then we will have to make due without any detailed
1513*0Sstevel@tonic-gate 	 * logout information.
1514*0Sstevel@tonic-gate 	 */
1515*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
1516*0Sstevel@tonic-gate 		clop = NULL;
1517*0Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1518*0Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
1519*0Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
1520*0Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
1521*0Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
1522*0Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
1523*0Sstevel@tonic-gate #if defined(SERRANO)
1524*0Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
1525*0Sstevel@tonic-gate #endif	/* SERRANO */
1526*0Sstevel@tonic-gate 	} else {
1527*0Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1528*0Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
1529*0Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
1530*0Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1531*0Sstevel@tonic-gate #if defined(SERRANO)
1532*0Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1533*0Sstevel@tonic-gate #endif	/* SERRANO */
1534*0Sstevel@tonic-gate 	}
1535*0Sstevel@tonic-gate 
1536*0Sstevel@tonic-gate 	/*
1537*0Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
1538*0Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1539*0Sstevel@tonic-gate 	 * sticky bits.
1540*0Sstevel@tonic-gate 	 */
1541*0Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1542*0Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 	pr_reason[0] = '\0';
1545*0Sstevel@tonic-gate 	/* Setup the async fault structure */
1546*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
1547*0Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
1548*0Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
1549*0Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
1550*0Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
1551*0Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
1552*0Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1553*0Sstevel@tonic-gate 	aflt->flt_tl = 0;
1554*0Sstevel@tonic-gate 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1555*0Sstevel@tonic-gate 
1556*0Sstevel@tonic-gate 	/*
1557*0Sstevel@tonic-gate 	 * If this trap is a result of one of the errors not masked
1558*0Sstevel@tonic-gate 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1559*0Sstevel@tonic-gate 	 * indicate that a timeout is to be set later.
1560*0Sstevel@tonic-gate 	 */
1561*0Sstevel@tonic-gate 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1562*0Sstevel@tonic-gate 	    !aflt->flt_panic)
1563*0Sstevel@tonic-gate 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1564*0Sstevel@tonic-gate 	else
1565*0Sstevel@tonic-gate 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1566*0Sstevel@tonic-gate 
1567*0Sstevel@tonic-gate 	/*
1568*0Sstevel@tonic-gate 	 * log the CE and clean up
1569*0Sstevel@tonic-gate 	 */
1570*0Sstevel@tonic-gate 	cpu_log_and_clear_ce(&ch_flt);
1571*0Sstevel@tonic-gate 
1572*0Sstevel@tonic-gate 	/*
1573*0Sstevel@tonic-gate 	 * We re-enable CEEN (if required) and check if any disrupting errors
1574*0Sstevel@tonic-gate 	 * have happened.  We do this because if a disrupting error had occurred
1575*0Sstevel@tonic-gate 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1576*0Sstevel@tonic-gate 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1577*0Sstevel@tonic-gate 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1578*0Sstevel@tonic-gate 	 * of a error happening between checking the AFSR and enabling CEEN.
1579*0Sstevel@tonic-gate 	 */
1580*0Sstevel@tonic-gate 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1581*0Sstevel@tonic-gate 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1582*0Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
1583*0Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1584*0Sstevel@tonic-gate 		    NULL);
1585*0Sstevel@tonic-gate 	}
1586*0Sstevel@tonic-gate 
1587*0Sstevel@tonic-gate 	/*
1588*0Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1589*0Sstevel@tonic-gate 	 * be logged as part of the panic flow.
1590*0Sstevel@tonic-gate 	 */
1591*0Sstevel@tonic-gate 	if (aflt->flt_panic)
1592*0Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
1593*0Sstevel@tonic-gate }
1594*0Sstevel@tonic-gate 
1595*0Sstevel@tonic-gate /*
1596*0Sstevel@tonic-gate  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1597*0Sstevel@tonic-gate  * L3_EDU:BLD, TO, and BERR events.
1598*0Sstevel@tonic-gate  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1599*0Sstevel@tonic-gate  *
1600*0Sstevel@tonic-gate  * Cheetah+: No additional errors handled.
1601*0Sstevel@tonic-gate  *
1602*0Sstevel@tonic-gate  * Note that the p_clo_flags input is only valid in cases where the
1603*0Sstevel@tonic-gate  * cpu_private struct is not yet initialized (since that is the only
1604*0Sstevel@tonic-gate  * time that information cannot be obtained from the logout struct.)
1605*0Sstevel@tonic-gate  */
1606*0Sstevel@tonic-gate /*ARGSUSED*/
1607*0Sstevel@tonic-gate void
1608*0Sstevel@tonic-gate cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1609*0Sstevel@tonic-gate {
1610*0Sstevel@tonic-gate 	ushort_t ttype, tl;
1611*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
1612*0Sstevel@tonic-gate 	struct async_flt *aflt;
1613*0Sstevel@tonic-gate 	int trampolined = 0;
1614*0Sstevel@tonic-gate 	char pr_reason[MAX_REASON_STRING];
1615*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
1616*0Sstevel@tonic-gate 	uint64_t ceen, clo_flags;
1617*0Sstevel@tonic-gate 	uint64_t log_afsr;
1618*0Sstevel@tonic-gate 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1619*0Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
1620*0Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
1621*0Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
1622*0Sstevel@tonic-gate 
1623*0Sstevel@tonic-gate 	/*
1624*0Sstevel@tonic-gate 	 * We need to look at p_flag to determine if the thread detected an
1625*0Sstevel@tonic-gate 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1626*0Sstevel@tonic-gate 	 * because we just need a consistent snapshot and we know that everyone
1627*0Sstevel@tonic-gate 	 * else will store a consistent set of bits while holding p_lock.  We
1628*0Sstevel@tonic-gate 	 * don't have to worry about a race because SDOCORE is set once prior
1629*0Sstevel@tonic-gate 	 * to doing i/o from the process's address space and is never cleared.
1630*0Sstevel@tonic-gate 	 */
1631*0Sstevel@tonic-gate 	uint_t pflag = ttoproc(curthread)->p_flag;
1632*0Sstevel@tonic-gate 
1633*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1634*0Sstevel@tonic-gate 	/*
1635*0Sstevel@tonic-gate 	 * Get the CPU log out info. If we can't find our CPU private
1636*0Sstevel@tonic-gate 	 * pointer then we will have to make due without any detailed
1637*0Sstevel@tonic-gate 	 * logout information.
1638*0Sstevel@tonic-gate 	 */
1639*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
1640*0Sstevel@tonic-gate 		clop = NULL;
1641*0Sstevel@tonic-gate 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1642*0Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
1643*0Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
1644*0Sstevel@tonic-gate 		t_afar = cpu_error_regs.afar;
1645*0Sstevel@tonic-gate 		t_afsr = cpu_error_regs.afsr;
1646*0Sstevel@tonic-gate 		t_afsr_ext = cpu_error_regs.afsr_ext;
1647*0Sstevel@tonic-gate #if defined(SERRANO)
1648*0Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
1649*0Sstevel@tonic-gate #endif	/* SERRANO */
1650*0Sstevel@tonic-gate 		clo_flags = p_clo_flags;
1651*0Sstevel@tonic-gate 	} else {
1652*0Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1653*0Sstevel@tonic-gate 		t_afar = clop->clo_data.chd_afar;
1654*0Sstevel@tonic-gate 		t_afsr = clop->clo_data.chd_afsr;
1655*0Sstevel@tonic-gate 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1656*0Sstevel@tonic-gate #if defined(SERRANO)
1657*0Sstevel@tonic-gate 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1658*0Sstevel@tonic-gate #endif	/* SERRANO */
1659*0Sstevel@tonic-gate 		clo_flags = clop->clo_flags;
1660*0Sstevel@tonic-gate 	}
1661*0Sstevel@tonic-gate 
1662*0Sstevel@tonic-gate 	/*
1663*0Sstevel@tonic-gate 	 * In order to simplify code, we maintain this afsr_errs
1664*0Sstevel@tonic-gate 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1665*0Sstevel@tonic-gate 	 * sticky bits.
1666*0Sstevel@tonic-gate 	 */
1667*0Sstevel@tonic-gate 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1668*0Sstevel@tonic-gate 	    (t_afsr & C_AFSR_ALL_ERRS);
1669*0Sstevel@tonic-gate 	pr_reason[0] = '\0';
1670*0Sstevel@tonic-gate 
1671*0Sstevel@tonic-gate 	/*
1672*0Sstevel@tonic-gate 	 * Grab information encoded into our clo_flags field.
1673*0Sstevel@tonic-gate 	 */
1674*0Sstevel@tonic-gate 	ceen = clo_flags & EN_REG_CEEN;
1675*0Sstevel@tonic-gate 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1676*0Sstevel@tonic-gate 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1677*0Sstevel@tonic-gate 
1678*0Sstevel@tonic-gate 	/*
1679*0Sstevel@tonic-gate 	 * handle the specific error
1680*0Sstevel@tonic-gate 	 */
1681*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
1682*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
1683*0Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
1684*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
1685*0Sstevel@tonic-gate 	ch_flt.afsr_ext = t_afsr_ext;
1686*0Sstevel@tonic-gate 	ch_flt.afsr_errs = t_afsr_errs;
1687*0Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
1688*0Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
1689*0Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
1690*0Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
1691*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
1692*0Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1693*0Sstevel@tonic-gate 	aflt->flt_tl = (uchar_t)tl;
1694*0Sstevel@tonic-gate 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1695*0Sstevel@tonic-gate 	    C_AFSR_PANIC(t_afsr_errs));
1696*0Sstevel@tonic-gate 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1697*0Sstevel@tonic-gate 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1698*0Sstevel@tonic-gate 
1699*0Sstevel@tonic-gate 	/*
1700*0Sstevel@tonic-gate 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1701*0Sstevel@tonic-gate 	 * see if we were executing in the kernel under on_trap() or t_lofault
1702*0Sstevel@tonic-gate 	 * protection.  If so, modify the saved registers so that we return
1703*0Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine.
1704*0Sstevel@tonic-gate 	 */
1705*0Sstevel@tonic-gate 	if (aflt->flt_priv && tl == 0) {
1706*0Sstevel@tonic-gate 		if (curthread->t_ontrap != NULL) {
1707*0Sstevel@tonic-gate 			on_trap_data_t *otp = curthread->t_ontrap;
1708*0Sstevel@tonic-gate 
1709*0Sstevel@tonic-gate 			if (otp->ot_prot & OT_DATA_EC) {
1710*0Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_EC;
1711*0Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_EC;
1712*0Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
1713*0Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
1714*0Sstevel@tonic-gate 				trampolined = 1;
1715*0Sstevel@tonic-gate 			}
1716*0Sstevel@tonic-gate 
1717*0Sstevel@tonic-gate 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1718*0Sstevel@tonic-gate 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1719*0Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_ACCESS;
1720*0Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_ACCESS;
1721*0Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
1722*0Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
1723*0Sstevel@tonic-gate 				trampolined = 1;
1724*0Sstevel@tonic-gate 				/*
1725*0Sstevel@tonic-gate 				 * for peeks and caut_gets errors are expected
1726*0Sstevel@tonic-gate 				 */
1727*0Sstevel@tonic-gate 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1728*0Sstevel@tonic-gate 				if (!hp)
1729*0Sstevel@tonic-gate 					expected = DDI_FM_ERR_PEEK;
1730*0Sstevel@tonic-gate 				else if (hp->ah_acc.devacc_attr_access ==
1731*0Sstevel@tonic-gate 				    DDI_CAUTIOUS_ACC)
1732*0Sstevel@tonic-gate 					expected = DDI_FM_ERR_EXPECTED;
1733*0Sstevel@tonic-gate 			}
1734*0Sstevel@tonic-gate 
1735*0Sstevel@tonic-gate 		} else if (curthread->t_lofault) {
1736*0Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_COPY;
1737*0Sstevel@tonic-gate 			rp->r_g1 = EFAULT;
1738*0Sstevel@tonic-gate 			rp->r_pc = curthread->t_lofault;
1739*0Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
1740*0Sstevel@tonic-gate 			trampolined = 1;
1741*0Sstevel@tonic-gate 		}
1742*0Sstevel@tonic-gate 	}
1743*0Sstevel@tonic-gate 
1744*0Sstevel@tonic-gate 	/*
1745*0Sstevel@tonic-gate 	 * If we're in user mode or we're doing a protected copy, we either
1746*0Sstevel@tonic-gate 	 * want the ASTON code below to send a signal to the user process
1747*0Sstevel@tonic-gate 	 * or we want to panic if aft_panic is set.
1748*0Sstevel@tonic-gate 	 *
1749*0Sstevel@tonic-gate 	 * If we're in privileged mode and we're not doing a copy, then we
1750*0Sstevel@tonic-gate 	 * need to check if we've trampolined.  If we haven't trampolined,
1751*0Sstevel@tonic-gate 	 * we should panic.
1752*0Sstevel@tonic-gate 	 */
1753*0Sstevel@tonic-gate 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1754*0Sstevel@tonic-gate 		if (t_afsr_errs &
1755*0Sstevel@tonic-gate 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1756*0Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1757*0Sstevel@tonic-gate 			aflt->flt_panic |= aft_panic;
1758*0Sstevel@tonic-gate 	} else if (!trampolined) {
1759*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1760*0Sstevel@tonic-gate 	}
1761*0Sstevel@tonic-gate 
1762*0Sstevel@tonic-gate 	/*
1763*0Sstevel@tonic-gate 	 * If we've trampolined due to a privileged TO or BERR, or if an
1764*0Sstevel@tonic-gate 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1765*0Sstevel@tonic-gate 	 * event for that TO or BERR.  Queue all other events (if any) besides
1766*0Sstevel@tonic-gate 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1767*0Sstevel@tonic-gate 	 * ignore the number of events queued.  If we haven't trampolined due
1768*0Sstevel@tonic-gate 	 * to a TO or BERR, just enqueue events normally.
1769*0Sstevel@tonic-gate 	 */
1770*0Sstevel@tonic-gate 	log_afsr = t_afsr_errs;
1771*0Sstevel@tonic-gate 	if (trampolined) {
1772*0Sstevel@tonic-gate 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1773*0Sstevel@tonic-gate 	} else if (!aflt->flt_priv) {
1774*0Sstevel@tonic-gate 		/*
1775*0Sstevel@tonic-gate 		 * User mode, suppress messages if
1776*0Sstevel@tonic-gate 		 * cpu_berr_to_verbose is not set.
1777*0Sstevel@tonic-gate 		 */
1778*0Sstevel@tonic-gate 		if (!cpu_berr_to_verbose)
1779*0Sstevel@tonic-gate 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1780*0Sstevel@tonic-gate 	}
1781*0Sstevel@tonic-gate 
1782*0Sstevel@tonic-gate 	/*
1783*0Sstevel@tonic-gate 	 * Log any errors that occurred
1784*0Sstevel@tonic-gate 	 */
1785*0Sstevel@tonic-gate 	if (((log_afsr &
1786*0Sstevel@tonic-gate 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1787*0Sstevel@tonic-gate 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1788*0Sstevel@tonic-gate 		(t_afsr_errs &
1789*0Sstevel@tonic-gate 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1790*0Sstevel@tonic-gate 		ch_flt.flt_type = CPU_INV_AFSR;
1791*0Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1792*0Sstevel@tonic-gate 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1793*0Sstevel@tonic-gate 		    aflt->flt_panic);
1794*0Sstevel@tonic-gate 	}
1795*0Sstevel@tonic-gate 
1796*0Sstevel@tonic-gate 	/*
1797*0Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
1798*0Sstevel@tonic-gate 	 */
1799*0Sstevel@tonic-gate 	if (clop) {
1800*0Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
1801*0Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1802*0Sstevel@tonic-gate 	}
1803*0Sstevel@tonic-gate 
1804*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
1805*0Sstevel@tonic-gate 	/*
1806*0Sstevel@tonic-gate 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1807*0Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
1808*0Sstevel@tonic-gate 	 */
1809*0Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1810*0Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
1811*0Sstevel@tonic-gate 	}
1812*0Sstevel@tonic-gate 
1813*0Sstevel@tonic-gate 	/*
1814*0Sstevel@tonic-gate 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1815*0Sstevel@tonic-gate 	 * line from the Ecache.  We also need to query the bus nexus for
1816*0Sstevel@tonic-gate 	 * fatal errors.  Attempts to do diagnostic read on caches may
1817*0Sstevel@tonic-gate 	 * introduce more errors (especially when the module is bad).
1818*0Sstevel@tonic-gate 	 */
1819*0Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1820*0Sstevel@tonic-gate 		/*
1821*0Sstevel@tonic-gate 		 * Ask our bus nexus friends if they have any fatal errors.  If
1822*0Sstevel@tonic-gate 		 * so, they will log appropriate error messages.
1823*0Sstevel@tonic-gate 		 */
1824*0Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1825*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1826*0Sstevel@tonic-gate 
1827*0Sstevel@tonic-gate 		/*
1828*0Sstevel@tonic-gate 		 * We got a UE or RUE and are panicking, save the fault PA in
1829*0Sstevel@tonic-gate 		 * a known location so that the platform specific panic code
1830*0Sstevel@tonic-gate 		 * can check for copyback errors.
1831*0Sstevel@tonic-gate 		 */
1832*0Sstevel@tonic-gate 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1833*0Sstevel@tonic-gate 			panic_aflt = *aflt;
1834*0Sstevel@tonic-gate 		}
1835*0Sstevel@tonic-gate 	}
1836*0Sstevel@tonic-gate 
1837*0Sstevel@tonic-gate 	/*
1838*0Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache
1839*0Sstevel@tonic-gate 	 */
1840*0Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1841*0Sstevel@tonic-gate 		cpu_error_ecache_flush(&ch_flt);
1842*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
1843*0Sstevel@tonic-gate 	/*
1844*0Sstevel@tonic-gate 	 * UE/BERR/TO: Call our bus nexus friends to check for
1845*0Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
1846*0Sstevel@tonic-gate 	 */
1847*0Sstevel@tonic-gate 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1848*0Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
1849*0Sstevel@tonic-gate 	}
1850*0Sstevel@tonic-gate 
1851*0Sstevel@tonic-gate 	/*
1852*0Sstevel@tonic-gate 	 * UE: If the UE is in memory, we need to flush the bad
1853*0Sstevel@tonic-gate 	 * line from the Ecache.  We also need to query the bus nexus for
1854*0Sstevel@tonic-gate 	 * fatal errors.  Attempts to do diagnostic read on caches may
1855*0Sstevel@tonic-gate 	 * introduce more errors (especially when the module is bad).
1856*0Sstevel@tonic-gate 	 */
1857*0Sstevel@tonic-gate 	if (t_afsr & C_AFSR_UE) {
1858*0Sstevel@tonic-gate 		/*
1859*0Sstevel@tonic-gate 		 * Ask our legacy bus nexus friends if they have any fatal
1860*0Sstevel@tonic-gate 		 * errors.  If so, they will log appropriate error messages.
1861*0Sstevel@tonic-gate 		 */
1862*0Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1863*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1864*0Sstevel@tonic-gate 
1865*0Sstevel@tonic-gate 		/*
1866*0Sstevel@tonic-gate 		 * We got a UE and are panicking, save the fault PA in a known
1867*0Sstevel@tonic-gate 		 * location so that the platform specific panic code can check
1868*0Sstevel@tonic-gate 		 * for copyback errors.
1869*0Sstevel@tonic-gate 		 */
1870*0Sstevel@tonic-gate 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1871*0Sstevel@tonic-gate 			panic_aflt = *aflt;
1872*0Sstevel@tonic-gate 		}
1873*0Sstevel@tonic-gate 	}
1874*0Sstevel@tonic-gate 
1875*0Sstevel@tonic-gate 	/*
1876*0Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache
1877*0Sstevel@tonic-gate 	 */
1878*0Sstevel@tonic-gate 	if (t_afsr_errs &
1879*0Sstevel@tonic-gate 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1880*0Sstevel@tonic-gate 		cpu_error_ecache_flush(&ch_flt);
1881*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
1882*0Sstevel@tonic-gate 
1883*0Sstevel@tonic-gate 	/*
1884*0Sstevel@tonic-gate 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1885*0Sstevel@tonic-gate 	 * or disrupting errors have happened.  We do this because if a
1886*0Sstevel@tonic-gate 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1887*0Sstevel@tonic-gate 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1888*0Sstevel@tonic-gate 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1889*0Sstevel@tonic-gate 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1890*0Sstevel@tonic-gate 	 * deferred or disrupting error happening between checking the AFSR and
1891*0Sstevel@tonic-gate 	 * enabling NCEEN/CEEN.
1892*0Sstevel@tonic-gate 	 *
1893*0Sstevel@tonic-gate 	 * Note: CEEN reenabled only if it was on when trap taken.
1894*0Sstevel@tonic-gate 	 */
1895*0Sstevel@tonic-gate 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1896*0Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
1897*0Sstevel@tonic-gate 		/*
1898*0Sstevel@tonic-gate 		 * Check for secondary errors, and avoid panicking if we
1899*0Sstevel@tonic-gate 		 * have them
1900*0Sstevel@tonic-gate 		 */
1901*0Sstevel@tonic-gate 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
1902*0Sstevel@tonic-gate 		    t_afar) == 0) {
1903*0Sstevel@tonic-gate 			aflt->flt_panic |= ((ch_flt.afsr_errs &
1904*0Sstevel@tonic-gate 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
1905*0Sstevel@tonic-gate 		}
1906*0Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1907*0Sstevel@tonic-gate 		    NULL);
1908*0Sstevel@tonic-gate 	}
1909*0Sstevel@tonic-gate 
1910*0Sstevel@tonic-gate 	/*
1911*0Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1912*0Sstevel@tonic-gate 	 * be logged as part of the panic flow.
1913*0Sstevel@tonic-gate 	 */
1914*0Sstevel@tonic-gate 	if (aflt->flt_panic)
1915*0Sstevel@tonic-gate 		fm_panic("%sError(s)", pr_reason);
1916*0Sstevel@tonic-gate 
1917*0Sstevel@tonic-gate 	/*
1918*0Sstevel@tonic-gate 	 * If we queued an error and we are going to return from the trap and
1919*0Sstevel@tonic-gate 	 * the error was in user mode or inside of a copy routine, set AST flag
1920*0Sstevel@tonic-gate 	 * so the queue will be drained before returning to user mode.  The
1921*0Sstevel@tonic-gate 	 * AST processing will also act on our failure policy.
1922*0Sstevel@tonic-gate 	 */
1923*0Sstevel@tonic-gate 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1924*0Sstevel@tonic-gate 		int pcb_flag = 0;
1925*0Sstevel@tonic-gate 
1926*0Sstevel@tonic-gate 		if (t_afsr_errs &
1927*0Sstevel@tonic-gate 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
1928*0Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1929*0Sstevel@tonic-gate 			pcb_flag |= ASYNC_HWERR;
1930*0Sstevel@tonic-gate 
1931*0Sstevel@tonic-gate 		if (t_afsr & C_AFSR_BERR)
1932*0Sstevel@tonic-gate 			pcb_flag |= ASYNC_BERR;
1933*0Sstevel@tonic-gate 
1934*0Sstevel@tonic-gate 		if (t_afsr & C_AFSR_TO)
1935*0Sstevel@tonic-gate 			pcb_flag |= ASYNC_BTO;
1936*0Sstevel@tonic-gate 
1937*0Sstevel@tonic-gate 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1938*0Sstevel@tonic-gate 		aston(curthread);
1939*0Sstevel@tonic-gate 	}
1940*0Sstevel@tonic-gate }
1941*0Sstevel@tonic-gate 
1942*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
1943*0Sstevel@tonic-gate /*
1944*0Sstevel@tonic-gate  * Handling of data and instruction parity errors (traps 0x71, 0x72).
1945*0Sstevel@tonic-gate  *
1946*0Sstevel@tonic-gate  * For Panther, P$ data parity errors during floating point load hits
1947*0Sstevel@tonic-gate  * are also detected (reported as TT 0x71) and handled by this trap
1948*0Sstevel@tonic-gate  * handler.
1949*0Sstevel@tonic-gate  *
1950*0Sstevel@tonic-gate  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
1951*0Sstevel@tonic-gate  * is available.
1952*0Sstevel@tonic-gate  */
1953*0Sstevel@tonic-gate /*ARGSUSED*/
1954*0Sstevel@tonic-gate void
1955*0Sstevel@tonic-gate cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
1956*0Sstevel@tonic-gate {
1957*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
1958*0Sstevel@tonic-gate 	struct async_flt *aflt;
1959*0Sstevel@tonic-gate 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
1960*0Sstevel@tonic-gate 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
1961*0Sstevel@tonic-gate 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
1962*0Sstevel@tonic-gate 	char *error_class;
1963*0Sstevel@tonic-gate 
1964*0Sstevel@tonic-gate 	/*
1965*0Sstevel@tonic-gate 	 * Log the error.
1966*0Sstevel@tonic-gate 	 * For icache parity errors the fault address is the trap PC.
1967*0Sstevel@tonic-gate 	 * For dcache/pcache parity errors the instruction would have to
1968*0Sstevel@tonic-gate 	 * be decoded to determine the address and that isn't possible
1969*0Sstevel@tonic-gate 	 * at high PIL.
1970*0Sstevel@tonic-gate 	 */
1971*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1972*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
1973*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
1974*0Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
1975*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
1976*0Sstevel@tonic-gate 	aflt->flt_pc = tpc;
1977*0Sstevel@tonic-gate 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
1978*0Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
1979*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
1980*0Sstevel@tonic-gate 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
1981*0Sstevel@tonic-gate 	aflt->flt_tl = tl;
1982*0Sstevel@tonic-gate 	aflt->flt_panic = panic;
1983*0Sstevel@tonic-gate 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
1984*0Sstevel@tonic-gate 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
1985*0Sstevel@tonic-gate 
1986*0Sstevel@tonic-gate 	if (iparity) {
1987*0Sstevel@tonic-gate 		cpu_icache_parity_info(&ch_flt);
1988*0Sstevel@tonic-gate 		if (ch_flt.parity_data.ipe.cpl_off != -1)
1989*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
1990*0Sstevel@tonic-gate 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
1991*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
1992*0Sstevel@tonic-gate 		else
1993*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_IPE;
1994*0Sstevel@tonic-gate 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
1995*0Sstevel@tonic-gate 	} else {
1996*0Sstevel@tonic-gate 		cpu_dcache_parity_info(&ch_flt);
1997*0Sstevel@tonic-gate 		if (ch_flt.parity_data.dpe.cpl_off != -1)
1998*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
1999*0Sstevel@tonic-gate 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2000*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2001*0Sstevel@tonic-gate 		else
2002*0Sstevel@tonic-gate 			error_class = FM_EREPORT_CPU_USIII_DPE;
2003*0Sstevel@tonic-gate 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2004*0Sstevel@tonic-gate 		/*
2005*0Sstevel@tonic-gate 		 * For panther we also need to check the P$ for parity errors.
2006*0Sstevel@tonic-gate 		 */
2007*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2008*0Sstevel@tonic-gate 			cpu_pcache_parity_info(&ch_flt);
2009*0Sstevel@tonic-gate 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2010*0Sstevel@tonic-gate 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2011*0Sstevel@tonic-gate 				aflt->flt_payload =
2012*0Sstevel@tonic-gate 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2013*0Sstevel@tonic-gate 			}
2014*0Sstevel@tonic-gate 		}
2015*0Sstevel@tonic-gate 	}
2016*0Sstevel@tonic-gate 
2017*0Sstevel@tonic-gate 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2018*0Sstevel@tonic-gate 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2019*0Sstevel@tonic-gate 
2020*0Sstevel@tonic-gate 	if (iparity) {
2021*0Sstevel@tonic-gate 		/*
2022*0Sstevel@tonic-gate 		 * Invalidate entire I$.
2023*0Sstevel@tonic-gate 		 * This is required due to the use of diagnostic ASI
2024*0Sstevel@tonic-gate 		 * accesses that may result in a loss of I$ coherency.
2025*0Sstevel@tonic-gate 		 */
2026*0Sstevel@tonic-gate 		if (cache_boot_state & DCU_IC) {
2027*0Sstevel@tonic-gate 			flush_icache();
2028*0Sstevel@tonic-gate 		}
2029*0Sstevel@tonic-gate 		/*
2030*0Sstevel@tonic-gate 		 * According to section P.3.1 of the Panther PRM, we
2031*0Sstevel@tonic-gate 		 * need to do a little more for recovery on those
2032*0Sstevel@tonic-gate 		 * CPUs after encountering an I$ parity error.
2033*0Sstevel@tonic-gate 		 */
2034*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2035*0Sstevel@tonic-gate 			flush_ipb();
2036*0Sstevel@tonic-gate 			correct_dcache_parity(dcache_size,
2037*0Sstevel@tonic-gate 			    dcache_linesize);
2038*0Sstevel@tonic-gate 			flush_pcache();
2039*0Sstevel@tonic-gate 		}
2040*0Sstevel@tonic-gate 	} else {
2041*0Sstevel@tonic-gate 		/*
2042*0Sstevel@tonic-gate 		 * Since the valid bit is ignored when checking parity the
2043*0Sstevel@tonic-gate 		 * D$ data and tag must also be corrected.  Set D$ data bits
2044*0Sstevel@tonic-gate 		 * to zero and set utag to 0, 1, 2, 3.
2045*0Sstevel@tonic-gate 		 */
2046*0Sstevel@tonic-gate 		correct_dcache_parity(dcache_size, dcache_linesize);
2047*0Sstevel@tonic-gate 
2048*0Sstevel@tonic-gate 		/*
2049*0Sstevel@tonic-gate 		 * According to section P.3.3 of the Panther PRM, we
2050*0Sstevel@tonic-gate 		 * need to do a little more for recovery on those
2051*0Sstevel@tonic-gate 		 * CPUs after encountering a D$ or P$ parity error.
2052*0Sstevel@tonic-gate 		 *
2053*0Sstevel@tonic-gate 		 * As far as clearing P$ parity errors, it is enough to
2054*0Sstevel@tonic-gate 		 * simply invalidate all entries in the P$ since P$ parity
2055*0Sstevel@tonic-gate 		 * error traps are only generated for floating point load
2056*0Sstevel@tonic-gate 		 * hits.
2057*0Sstevel@tonic-gate 		 */
2058*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2059*0Sstevel@tonic-gate 			flush_icache();
2060*0Sstevel@tonic-gate 			flush_ipb();
2061*0Sstevel@tonic-gate 			flush_pcache();
2062*0Sstevel@tonic-gate 		}
2063*0Sstevel@tonic-gate 	}
2064*0Sstevel@tonic-gate 
2065*0Sstevel@tonic-gate 	/*
2066*0Sstevel@tonic-gate 	 * Invalidate entire D$ if it was enabled.
2067*0Sstevel@tonic-gate 	 * This is done to avoid stale data in the D$ which might
2068*0Sstevel@tonic-gate 	 * occur with the D$ disabled and the trap handler doing
2069*0Sstevel@tonic-gate 	 * stores affecting lines already in the D$.
2070*0Sstevel@tonic-gate 	 */
2071*0Sstevel@tonic-gate 	if (cache_boot_state & DCU_DC) {
2072*0Sstevel@tonic-gate 		flush_dcache();
2073*0Sstevel@tonic-gate 	}
2074*0Sstevel@tonic-gate 
2075*0Sstevel@tonic-gate 	/*
2076*0Sstevel@tonic-gate 	 * Restore caches to their bootup state.
2077*0Sstevel@tonic-gate 	 */
2078*0Sstevel@tonic-gate 	set_dcu(get_dcu() | cache_boot_state);
2079*0Sstevel@tonic-gate 
2080*0Sstevel@tonic-gate 	/*
2081*0Sstevel@tonic-gate 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2082*0Sstevel@tonic-gate 	 * be logged as part of the panic flow.
2083*0Sstevel@tonic-gate 	 */
2084*0Sstevel@tonic-gate 	if (aflt->flt_panic)
2085*0Sstevel@tonic-gate 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2086*0Sstevel@tonic-gate 
2087*0Sstevel@tonic-gate 	/*
2088*0Sstevel@tonic-gate 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2089*0Sstevel@tonic-gate 	 * the chance of getting an unrecoverable Fast ECC error.  This
2090*0Sstevel@tonic-gate 	 * flush will evict the part of the parity trap handler that is run
2091*0Sstevel@tonic-gate 	 * at TL>1.
2092*0Sstevel@tonic-gate 	 */
2093*0Sstevel@tonic-gate 	if (tl) {
2094*0Sstevel@tonic-gate 		cpu_flush_ecache();
2095*0Sstevel@tonic-gate 	}
2096*0Sstevel@tonic-gate }
2097*0Sstevel@tonic-gate 
2098*0Sstevel@tonic-gate /*
2099*0Sstevel@tonic-gate  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2100*0Sstevel@tonic-gate  * to indicate which portions of the captured data should be in the ereport.
2101*0Sstevel@tonic-gate  */
2102*0Sstevel@tonic-gate void
2103*0Sstevel@tonic-gate cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2104*0Sstevel@tonic-gate {
2105*0Sstevel@tonic-gate 	int way = ch_flt->parity_data.ipe.cpl_way;
2106*0Sstevel@tonic-gate 	int offset = ch_flt->parity_data.ipe.cpl_off;
2107*0Sstevel@tonic-gate 	int tag_index;
2108*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2109*0Sstevel@tonic-gate 
2110*0Sstevel@tonic-gate 
2111*0Sstevel@tonic-gate 	if ((offset != -1) || (way != -1)) {
2112*0Sstevel@tonic-gate 		/*
2113*0Sstevel@tonic-gate 		 * Parity error in I$ tag or data
2114*0Sstevel@tonic-gate 		 */
2115*0Sstevel@tonic-gate 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2116*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2117*0Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2118*0Sstevel@tonic-gate 			    PN_ICIDX_TO_WAY(tag_index);
2119*0Sstevel@tonic-gate 		else
2120*0Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2121*0Sstevel@tonic-gate 			    CH_ICIDX_TO_WAY(tag_index);
2122*0Sstevel@tonic-gate 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2123*0Sstevel@tonic-gate 		    IC_LOGFLAG_MAGIC;
2124*0Sstevel@tonic-gate 	} else {
2125*0Sstevel@tonic-gate 		/*
2126*0Sstevel@tonic-gate 		 * Parity error was not identified.
2127*0Sstevel@tonic-gate 		 * Log tags and data for all ways.
2128*0Sstevel@tonic-gate 		 */
2129*0Sstevel@tonic-gate 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2130*0Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2131*0Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2132*0Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2133*0Sstevel@tonic-gate 				    PN_ICIDX_TO_WAY(tag_index);
2134*0Sstevel@tonic-gate 			else
2135*0Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2136*0Sstevel@tonic-gate 				    CH_ICIDX_TO_WAY(tag_index);
2137*0Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2138*0Sstevel@tonic-gate 			    IC_LOGFLAG_MAGIC;
2139*0Sstevel@tonic-gate 		}
2140*0Sstevel@tonic-gate 	}
2141*0Sstevel@tonic-gate }
2142*0Sstevel@tonic-gate 
2143*0Sstevel@tonic-gate /*
2144*0Sstevel@tonic-gate  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2145*0Sstevel@tonic-gate  * to indicate which portions of the captured data should be in the ereport.
2146*0Sstevel@tonic-gate  */
2147*0Sstevel@tonic-gate void
2148*0Sstevel@tonic-gate cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2149*0Sstevel@tonic-gate {
2150*0Sstevel@tonic-gate 	int way = ch_flt->parity_data.dpe.cpl_way;
2151*0Sstevel@tonic-gate 	int offset = ch_flt->parity_data.dpe.cpl_off;
2152*0Sstevel@tonic-gate 	int tag_index;
2153*0Sstevel@tonic-gate 
2154*0Sstevel@tonic-gate 	if (offset != -1) {
2155*0Sstevel@tonic-gate 		/*
2156*0Sstevel@tonic-gate 		 * Parity error in D$ or P$ data array.
2157*0Sstevel@tonic-gate 		 *
2158*0Sstevel@tonic-gate 		 * First check to see whether the parity error is in D$ or P$
2159*0Sstevel@tonic-gate 		 * since P$ data parity errors are reported in Panther using
2160*0Sstevel@tonic-gate 		 * the same trap.
2161*0Sstevel@tonic-gate 		 */
2162*0Sstevel@tonic-gate 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2163*0Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2164*0Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2165*0Sstevel@tonic-gate 			    CH_PCIDX_TO_WAY(tag_index);
2166*0Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2167*0Sstevel@tonic-gate 			    PC_LOGFLAG_MAGIC;
2168*0Sstevel@tonic-gate 		} else {
2169*0Sstevel@tonic-gate 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2170*0Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2171*0Sstevel@tonic-gate 			    CH_DCIDX_TO_WAY(tag_index);
2172*0Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2173*0Sstevel@tonic-gate 			    DC_LOGFLAG_MAGIC;
2174*0Sstevel@tonic-gate 		}
2175*0Sstevel@tonic-gate 	} else if (way != -1) {
2176*0Sstevel@tonic-gate 		/*
2177*0Sstevel@tonic-gate 		 * Parity error in D$ tag.
2178*0Sstevel@tonic-gate 		 */
2179*0Sstevel@tonic-gate 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2180*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2181*0Sstevel@tonic-gate 		    CH_DCIDX_TO_WAY(tag_index);
2182*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2183*0Sstevel@tonic-gate 		    DC_LOGFLAG_MAGIC;
2184*0Sstevel@tonic-gate 	}
2185*0Sstevel@tonic-gate }
2186*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
2187*0Sstevel@tonic-gate 
2188*0Sstevel@tonic-gate /*
2189*0Sstevel@tonic-gate  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2190*0Sstevel@tonic-gate  * post-process CPU events that are dequeued.  As such, it can be invoked
2191*0Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
2192*0Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2193*0Sstevel@tonic-gate  * Historically this entry point was used to log the actual cmn_err(9F) text;
2194*0Sstevel@tonic-gate  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2195*0Sstevel@tonic-gate  * With FMA this function now also returns a flag which indicates to the
2196*0Sstevel@tonic-gate  * caller whether the ereport should be posted (1) or suppressed (0).
2197*0Sstevel@tonic-gate  */
2198*0Sstevel@tonic-gate static int
2199*0Sstevel@tonic-gate cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2200*0Sstevel@tonic-gate {
2201*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2202*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)flt;
2203*0Sstevel@tonic-gate 	page_t *pp;
2204*0Sstevel@tonic-gate 
2205*0Sstevel@tonic-gate 	switch (ch_flt->flt_type) {
2206*0Sstevel@tonic-gate 	case CPU_INV_AFSR:
2207*0Sstevel@tonic-gate 		/*
2208*0Sstevel@tonic-gate 		 * If it is a disrupting trap and the AFSR is zero, then
2209*0Sstevel@tonic-gate 		 * the event has probably already been noted. Do not post
2210*0Sstevel@tonic-gate 		 * an ereport.
2211*0Sstevel@tonic-gate 		 */
2212*0Sstevel@tonic-gate 		if ((aflt->flt_status & ECC_C_TRAP) &&
2213*0Sstevel@tonic-gate 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2214*0Sstevel@tonic-gate 			return (0);
2215*0Sstevel@tonic-gate 		else
2216*0Sstevel@tonic-gate 			return (1);
2217*0Sstevel@tonic-gate 	case CPU_TO:
2218*0Sstevel@tonic-gate 	case CPU_BERR:
2219*0Sstevel@tonic-gate 	case CPU_FATAL:
2220*0Sstevel@tonic-gate 	case CPU_FPUERR:
2221*0Sstevel@tonic-gate 		return (1);
2222*0Sstevel@tonic-gate 
2223*0Sstevel@tonic-gate 	case CPU_UE_ECACHE_RETIRE:
2224*0Sstevel@tonic-gate 		cpu_log_err(aflt);
2225*0Sstevel@tonic-gate 		cpu_page_retire(ch_flt);
2226*0Sstevel@tonic-gate 		return (1);
2227*0Sstevel@tonic-gate 
2228*0Sstevel@tonic-gate 	/*
2229*0Sstevel@tonic-gate 	 * Cases where we may want to suppress logging or perform
2230*0Sstevel@tonic-gate 	 * extended diagnostics.
2231*0Sstevel@tonic-gate 	 */
2232*0Sstevel@tonic-gate 	case CPU_CE:
2233*0Sstevel@tonic-gate 	case CPU_EMC:
2234*0Sstevel@tonic-gate 		pp = page_numtopp_nolock((pfn_t)
2235*0Sstevel@tonic-gate 		    (aflt->flt_addr >> MMU_PAGESHIFT));
2236*0Sstevel@tonic-gate 
2237*0Sstevel@tonic-gate 		/*
2238*0Sstevel@tonic-gate 		 * We want to skip logging and further classification
2239*0Sstevel@tonic-gate 		 * only if ALL the following conditions are true:
2240*0Sstevel@tonic-gate 		 *
2241*0Sstevel@tonic-gate 		 *	1. There is only one error
2242*0Sstevel@tonic-gate 		 *	2. That error is a correctable memory error
2243*0Sstevel@tonic-gate 		 *	3. The error is caused by the memory scrubber (in
2244*0Sstevel@tonic-gate 		 *	   which case the error will have occurred under
2245*0Sstevel@tonic-gate 		 *	   on_trap protection)
2246*0Sstevel@tonic-gate 		 *	4. The error is on a retired page
2247*0Sstevel@tonic-gate 		 *
2248*0Sstevel@tonic-gate 		 * Note: AFLT_PROT_EC is used places other than the memory
2249*0Sstevel@tonic-gate 		 * scrubber.  However, none of those errors should occur
2250*0Sstevel@tonic-gate 		 * on a retired page.
2251*0Sstevel@tonic-gate 		 */
2252*0Sstevel@tonic-gate 		if ((ch_flt->afsr_errs &
2253*0Sstevel@tonic-gate 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2254*0Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
2255*0Sstevel@tonic-gate 
2256*0Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
2257*0Sstevel@tonic-gate 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2258*0Sstevel@tonic-gate 
2259*0Sstevel@tonic-gate 				/*
2260*0Sstevel@tonic-gate 				 * Since we're skipping logging, we'll need
2261*0Sstevel@tonic-gate 				 * to schedule the re-enabling of CEEN
2262*0Sstevel@tonic-gate 				 */
2263*0Sstevel@tonic-gate 				(void) timeout(cpu_delayed_check_ce_errors,
2264*0Sstevel@tonic-gate 				    (void *)aflt->flt_inst, drv_usectohz(
2265*0Sstevel@tonic-gate 				    (clock_t)cpu_ceen_delay_secs * MICROSEC));
2266*0Sstevel@tonic-gate 			    }
2267*0Sstevel@tonic-gate 			    return (0);
2268*0Sstevel@tonic-gate 			}
2269*0Sstevel@tonic-gate 		}
2270*0Sstevel@tonic-gate 
2271*0Sstevel@tonic-gate 		/*
2272*0Sstevel@tonic-gate 		 * Perform/schedule further classification actions, but
2273*0Sstevel@tonic-gate 		 * only if the page is healthy (we don't want bad
2274*0Sstevel@tonic-gate 		 * pages inducing too much diagnostic activity).  If we could
2275*0Sstevel@tonic-gate 		 * not find a page pointer then we also skip this.  If
2276*0Sstevel@tonic-gate 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2277*0Sstevel@tonic-gate 		 * to copy and recirculate the event (for further diagnostics)
2278*0Sstevel@tonic-gate 		 * and we should not proceed to log it here.
2279*0Sstevel@tonic-gate 		 *
2280*0Sstevel@tonic-gate 		 * This must be the last step here before the cpu_log_err()
2281*0Sstevel@tonic-gate 		 * below - if an event recirculates cpu_ce_log_err() will
2282*0Sstevel@tonic-gate 		 * not call the current function but just proceed directly
2283*0Sstevel@tonic-gate 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2284*0Sstevel@tonic-gate 		 *
2285*0Sstevel@tonic-gate 		 * Note: Check cpu_impl_async_log_err if changing this
2286*0Sstevel@tonic-gate 		 */
2287*0Sstevel@tonic-gate 		if (pp) {
2288*0Sstevel@tonic-gate 			if (page_isretired(pp) || page_deteriorating(pp)) {
2289*0Sstevel@tonic-gate 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2290*0Sstevel@tonic-gate 				    CE_XDIAG_SKIP_PAGEDET);
2291*0Sstevel@tonic-gate 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2292*0Sstevel@tonic-gate 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2293*0Sstevel@tonic-gate 				return (0);
2294*0Sstevel@tonic-gate 			}
2295*0Sstevel@tonic-gate 		} else {
2296*0Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2297*0Sstevel@tonic-gate 			    CE_XDIAG_SKIP_NOPP);
2298*0Sstevel@tonic-gate 		}
2299*0Sstevel@tonic-gate 		/*FALLTHRU*/
2300*0Sstevel@tonic-gate 
2301*0Sstevel@tonic-gate 	/*
2302*0Sstevel@tonic-gate 	 * Cases where we just want to report the error and continue.
2303*0Sstevel@tonic-gate 	 */
2304*0Sstevel@tonic-gate 	case CPU_CE_ECACHE:
2305*0Sstevel@tonic-gate 	case CPU_UE_ECACHE:
2306*0Sstevel@tonic-gate 	case CPU_IV:
2307*0Sstevel@tonic-gate 	case CPU_ORPH:
2308*0Sstevel@tonic-gate 		cpu_log_err(aflt);
2309*0Sstevel@tonic-gate 		return (1);
2310*0Sstevel@tonic-gate 
2311*0Sstevel@tonic-gate 	/*
2312*0Sstevel@tonic-gate 	 * Cases where we want to fall through to handle panicking.
2313*0Sstevel@tonic-gate 	 */
2314*0Sstevel@tonic-gate 	case CPU_UE:
2315*0Sstevel@tonic-gate 		/*
2316*0Sstevel@tonic-gate 		 * We want to skip logging in the same conditions as the
2317*0Sstevel@tonic-gate 		 * CE case.  In addition, we want to make sure we're not
2318*0Sstevel@tonic-gate 		 * panicking.
2319*0Sstevel@tonic-gate 		 */
2320*0Sstevel@tonic-gate 		if (!panicstr && (ch_flt->afsr_errs &
2321*0Sstevel@tonic-gate 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2322*0Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
2323*0Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
2324*0Sstevel@tonic-gate 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2325*0Sstevel@tonic-gate 
2326*0Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
2327*0Sstevel@tonic-gate 
2328*0Sstevel@tonic-gate 				/* Zero the address to clear the error */
2329*0Sstevel@tonic-gate 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2330*0Sstevel@tonic-gate 				return (0);
2331*0Sstevel@tonic-gate 			}
2332*0Sstevel@tonic-gate 		}
2333*0Sstevel@tonic-gate 		cpu_log_err(aflt);
2334*0Sstevel@tonic-gate 		break;
2335*0Sstevel@tonic-gate 
2336*0Sstevel@tonic-gate 	default:
2337*0Sstevel@tonic-gate 		/*
2338*0Sstevel@tonic-gate 		 * If the us3_common.c code doesn't know the flt_type, it may
2339*0Sstevel@tonic-gate 		 * be an implementation-specific code.  Call into the impldep
2340*0Sstevel@tonic-gate 		 * backend to find out what to do: if it tells us to continue,
2341*0Sstevel@tonic-gate 		 * break and handle as if falling through from a UE; if not,
2342*0Sstevel@tonic-gate 		 * the impldep backend has handled the error and we're done.
2343*0Sstevel@tonic-gate 		 */
2344*0Sstevel@tonic-gate 		switch (cpu_impl_async_log_err(flt, eqep)) {
2345*0Sstevel@tonic-gate 		case CH_ASYNC_LOG_DONE:
2346*0Sstevel@tonic-gate 			return (1);
2347*0Sstevel@tonic-gate 		case CH_ASYNC_LOG_RECIRC:
2348*0Sstevel@tonic-gate 			return (0);
2349*0Sstevel@tonic-gate 		case CH_ASYNC_LOG_CONTINUE:
2350*0Sstevel@tonic-gate 			break; /* continue on to handle UE-like error */
2351*0Sstevel@tonic-gate 		default:
2352*0Sstevel@tonic-gate 			cmn_err(CE_WARN, "discarding error 0x%p with "
2353*0Sstevel@tonic-gate 			    "invalid fault type (0x%x)",
2354*0Sstevel@tonic-gate 			    (void *)aflt, ch_flt->flt_type);
2355*0Sstevel@tonic-gate 			return (0);
2356*0Sstevel@tonic-gate 		}
2357*0Sstevel@tonic-gate 	}
2358*0Sstevel@tonic-gate 
2359*0Sstevel@tonic-gate 	/* ... fall through from the UE case */
2360*0Sstevel@tonic-gate 
2361*0Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2362*0Sstevel@tonic-gate 		if (!panicstr) {
2363*0Sstevel@tonic-gate 			cpu_page_retire(ch_flt);
2364*0Sstevel@tonic-gate 		} else {
2365*0Sstevel@tonic-gate 			/*
2366*0Sstevel@tonic-gate 			 * Clear UEs on panic so that we don't
2367*0Sstevel@tonic-gate 			 * get haunted by them during panic or
2368*0Sstevel@tonic-gate 			 * after reboot
2369*0Sstevel@tonic-gate 			 */
2370*0Sstevel@tonic-gate 			cpu_clearphys(aflt);
2371*0Sstevel@tonic-gate 			(void) clear_errors(NULL);
2372*0Sstevel@tonic-gate 		}
2373*0Sstevel@tonic-gate 	}
2374*0Sstevel@tonic-gate 
2375*0Sstevel@tonic-gate 	return (1);
2376*0Sstevel@tonic-gate }
2377*0Sstevel@tonic-gate 
2378*0Sstevel@tonic-gate /*
2379*0Sstevel@tonic-gate  * Retire the bad page that may contain the flushed error.
2380*0Sstevel@tonic-gate  */
2381*0Sstevel@tonic-gate void
2382*0Sstevel@tonic-gate cpu_page_retire(ch_async_flt_t *ch_flt)
2383*0Sstevel@tonic-gate {
2384*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2385*0Sstevel@tonic-gate 	page_t *pp = page_numtopp_nolock(aflt->flt_addr >> MMU_PAGESHIFT);
2386*0Sstevel@tonic-gate 
2387*0Sstevel@tonic-gate 	if (pp != NULL) {
2388*0Sstevel@tonic-gate 		page_settoxic(pp, PAGE_IS_FAULTY);
2389*0Sstevel@tonic-gate 		(void) page_retire(pp, PAGE_IS_TOXIC);
2390*0Sstevel@tonic-gate 	}
2391*0Sstevel@tonic-gate }
2392*0Sstevel@tonic-gate 
2393*0Sstevel@tonic-gate /*
2394*0Sstevel@tonic-gate  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2395*0Sstevel@tonic-gate  * generic event post-processing for correctable and uncorrectable memory,
2396*0Sstevel@tonic-gate  * E$, and MTag errors.  Historically this entry point was used to log bits of
2397*0Sstevel@tonic-gate  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2398*0Sstevel@tonic-gate  * converted into an ereport.  In addition, it transmits the error to any
2399*0Sstevel@tonic-gate  * platform-specific service-processor FRU logging routines, if available.
2400*0Sstevel@tonic-gate  */
2401*0Sstevel@tonic-gate void
2402*0Sstevel@tonic-gate cpu_log_err(struct async_flt *aflt)
2403*0Sstevel@tonic-gate {
2404*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
2405*0Sstevel@tonic-gate 	int len = 0;
2406*0Sstevel@tonic-gate 	int synd_status, synd_code, afar_status;
2407*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2408*0Sstevel@tonic-gate 
2409*0Sstevel@tonic-gate 	/*
2410*0Sstevel@tonic-gate 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2411*0Sstevel@tonic-gate 	 * For Panther, L2$ is not external, so we don't want to
2412*0Sstevel@tonic-gate 	 * generate an E$ unum for those errors.
2413*0Sstevel@tonic-gate 	 */
2414*0Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2415*0Sstevel@tonic-gate 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2416*0Sstevel@tonic-gate 			aflt->flt_status |= ECC_ECACHE;
2417*0Sstevel@tonic-gate 	} else {
2418*0Sstevel@tonic-gate 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2419*0Sstevel@tonic-gate 			aflt->flt_status |= ECC_ECACHE;
2420*0Sstevel@tonic-gate 	}
2421*0Sstevel@tonic-gate 
2422*0Sstevel@tonic-gate 	/*
2423*0Sstevel@tonic-gate 	 * Determine syndrome status.
2424*0Sstevel@tonic-gate 	 */
2425*0Sstevel@tonic-gate 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2426*0Sstevel@tonic-gate 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2427*0Sstevel@tonic-gate 
2428*0Sstevel@tonic-gate 	/*
2429*0Sstevel@tonic-gate 	 * Determine afar status.
2430*0Sstevel@tonic-gate 	 */
2431*0Sstevel@tonic-gate 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2432*0Sstevel@tonic-gate 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2433*0Sstevel@tonic-gate 				ch_flt->flt_bit);
2434*0Sstevel@tonic-gate 	else
2435*0Sstevel@tonic-gate 		afar_status = AFLT_STAT_INVALID;
2436*0Sstevel@tonic-gate 
2437*0Sstevel@tonic-gate 	/*
2438*0Sstevel@tonic-gate 	 * If afar status is not invalid do a unum lookup.
2439*0Sstevel@tonic-gate 	 */
2440*0Sstevel@tonic-gate 	if (afar_status != AFLT_STAT_INVALID) {
2441*0Sstevel@tonic-gate 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2442*0Sstevel@tonic-gate 			UNUM_NAMLEN, &len);
2443*0Sstevel@tonic-gate 	} else {
2444*0Sstevel@tonic-gate 		unum[0] = '\0';
2445*0Sstevel@tonic-gate 	}
2446*0Sstevel@tonic-gate 
2447*0Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status,
2448*0Sstevel@tonic-gate 	    aflt->flt_synd, ch_flt->flt_bit);
2449*0Sstevel@tonic-gate 
2450*0Sstevel@tonic-gate 	/*
2451*0Sstevel@tonic-gate 	 * Do not send the fruid message (plat_ecc_error_data_t)
2452*0Sstevel@tonic-gate 	 * to the SC if it can handle the enhanced error information
2453*0Sstevel@tonic-gate 	 * (plat_ecc_error2_data_t) or when the tunable
2454*0Sstevel@tonic-gate 	 * ecc_log_fruid_enable is set to 0.
2455*0Sstevel@tonic-gate 	 */
2456*0Sstevel@tonic-gate 
2457*0Sstevel@tonic-gate 	if (&plat_ecc_capability_sc_get &&
2458*0Sstevel@tonic-gate 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2459*0Sstevel@tonic-gate 		if (&plat_log_fruid_error)
2460*0Sstevel@tonic-gate 			plat_log_fruid_error(synd_code, aflt, unum,
2461*0Sstevel@tonic-gate 			    ch_flt->flt_bit);
2462*0Sstevel@tonic-gate 	}
2463*0Sstevel@tonic-gate 
2464*0Sstevel@tonic-gate 	if (aflt->flt_func != NULL)
2465*0Sstevel@tonic-gate 		aflt->flt_func(aflt, unum);
2466*0Sstevel@tonic-gate 
2467*0Sstevel@tonic-gate 	if (afar_status != AFLT_STAT_INVALID)
2468*0Sstevel@tonic-gate 		cpu_log_diag_info(ch_flt);
2469*0Sstevel@tonic-gate 
2470*0Sstevel@tonic-gate 	/*
2471*0Sstevel@tonic-gate 	 * If we have a CEEN error , we do not reenable CEEN until after
2472*0Sstevel@tonic-gate 	 * we exit the trap handler. Otherwise, another error may
2473*0Sstevel@tonic-gate 	 * occur causing the handler to be entered recursively.
2474*0Sstevel@tonic-gate 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2475*0Sstevel@tonic-gate 	 * to try and ensure that the CPU makes progress in the face
2476*0Sstevel@tonic-gate 	 * of a CE storm.
2477*0Sstevel@tonic-gate 	 */
2478*0Sstevel@tonic-gate 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2479*0Sstevel@tonic-gate 		(void) timeout(cpu_delayed_check_ce_errors,
2480*0Sstevel@tonic-gate 		    (void *)aflt->flt_inst,
2481*0Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2482*0Sstevel@tonic-gate 	}
2483*0Sstevel@tonic-gate }
2484*0Sstevel@tonic-gate 
2485*0Sstevel@tonic-gate /*
2486*0Sstevel@tonic-gate  * Invoked by error_init() early in startup and therefore before
2487*0Sstevel@tonic-gate  * startup_errorq() is called to drain any error Q -
2488*0Sstevel@tonic-gate  *
2489*0Sstevel@tonic-gate  * startup()
2490*0Sstevel@tonic-gate  *   startup_end()
2491*0Sstevel@tonic-gate  *     error_init()
2492*0Sstevel@tonic-gate  *       cpu_error_init()
2493*0Sstevel@tonic-gate  * errorq_init()
2494*0Sstevel@tonic-gate  *   errorq_drain()
2495*0Sstevel@tonic-gate  * start_other_cpus()
2496*0Sstevel@tonic-gate  *
2497*0Sstevel@tonic-gate  * The purpose of this routine is to create error-related taskqs.  Taskqs
2498*0Sstevel@tonic-gate  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2499*0Sstevel@tonic-gate  * context.
2500*0Sstevel@tonic-gate  */
2501*0Sstevel@tonic-gate void
2502*0Sstevel@tonic-gate cpu_error_init(int items)
2503*0Sstevel@tonic-gate {
2504*0Sstevel@tonic-gate 	/*
2505*0Sstevel@tonic-gate 	 * Create taskq(s) to reenable CE
2506*0Sstevel@tonic-gate 	 */
2507*0Sstevel@tonic-gate 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2508*0Sstevel@tonic-gate 	    items, items, TASKQ_PREPOPULATE);
2509*0Sstevel@tonic-gate }
2510*0Sstevel@tonic-gate 
2511*0Sstevel@tonic-gate void
2512*0Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2513*0Sstevel@tonic-gate {
2514*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
2515*0Sstevel@tonic-gate 	int len;
2516*0Sstevel@tonic-gate 
2517*0Sstevel@tonic-gate 	switch (aflt->flt_class) {
2518*0Sstevel@tonic-gate 	case CPU_FAULT:
2519*0Sstevel@tonic-gate 		cpu_ereport_init(aflt);
2520*0Sstevel@tonic-gate 		if (cpu_async_log_err(aflt, eqep))
2521*0Sstevel@tonic-gate 			cpu_ereport_post(aflt);
2522*0Sstevel@tonic-gate 		break;
2523*0Sstevel@tonic-gate 
2524*0Sstevel@tonic-gate 	case BUS_FAULT:
2525*0Sstevel@tonic-gate 		if (aflt->flt_func != NULL) {
2526*0Sstevel@tonic-gate 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2527*0Sstevel@tonic-gate 			    unum, UNUM_NAMLEN, &len);
2528*0Sstevel@tonic-gate 			aflt->flt_func(aflt, unum);
2529*0Sstevel@tonic-gate 		}
2530*0Sstevel@tonic-gate 		break;
2531*0Sstevel@tonic-gate 
2532*0Sstevel@tonic-gate 	case RECIRC_CPU_FAULT:
2533*0Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
2534*0Sstevel@tonic-gate 		cpu_log_err(aflt);
2535*0Sstevel@tonic-gate 		cpu_ereport_post(aflt);
2536*0Sstevel@tonic-gate 		break;
2537*0Sstevel@tonic-gate 
2538*0Sstevel@tonic-gate 	case RECIRC_BUS_FAULT:
2539*0Sstevel@tonic-gate 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2540*0Sstevel@tonic-gate 		/*FALLTHRU*/
2541*0Sstevel@tonic-gate 	default:
2542*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2543*0Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2544*0Sstevel@tonic-gate 		return;
2545*0Sstevel@tonic-gate 	}
2546*0Sstevel@tonic-gate }
2547*0Sstevel@tonic-gate 
2548*0Sstevel@tonic-gate /*
2549*0Sstevel@tonic-gate  * Scrub and classify a CE.  This function must not modify the
2550*0Sstevel@tonic-gate  * fault structure passed to it but instead should return the classification
2551*0Sstevel@tonic-gate  * information.
2552*0Sstevel@tonic-gate  */
2553*0Sstevel@tonic-gate 
2554*0Sstevel@tonic-gate static uchar_t
2555*0Sstevel@tonic-gate cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2556*0Sstevel@tonic-gate {
2557*0Sstevel@tonic-gate 	uchar_t disp = CE_XDIAG_EXTALG;
2558*0Sstevel@tonic-gate 	on_trap_data_t otd;
2559*0Sstevel@tonic-gate 	uint64_t orig_err;
2560*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
2561*0Sstevel@tonic-gate 
2562*0Sstevel@tonic-gate 	/*
2563*0Sstevel@tonic-gate 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2564*0Sstevel@tonic-gate 	 * this, but our other callers have not.  Disable preemption to
2565*0Sstevel@tonic-gate 	 * avoid CPU migration so that we restore CEEN on the correct
2566*0Sstevel@tonic-gate 	 * cpu later.
2567*0Sstevel@tonic-gate 	 *
2568*0Sstevel@tonic-gate 	 * CEEN is cleared so that further CEs that our instruction and
2569*0Sstevel@tonic-gate 	 * data footprint induce do not cause use to either creep down
2570*0Sstevel@tonic-gate 	 * kernel stack to the point of overflow, or do so much CE
2571*0Sstevel@tonic-gate 	 * notification as to make little real forward progress.
2572*0Sstevel@tonic-gate 	 *
2573*0Sstevel@tonic-gate 	 * NCEEN must not be cleared.  However it is possible that
2574*0Sstevel@tonic-gate 	 * our accesses to the flt_addr may provoke a bus error or timeout
2575*0Sstevel@tonic-gate 	 * if the offending address has just been unconfigured as part of
2576*0Sstevel@tonic-gate 	 * a DR action.  So we must operate under on_trap protection.
2577*0Sstevel@tonic-gate 	 */
2578*0Sstevel@tonic-gate 	kpreempt_disable();
2579*0Sstevel@tonic-gate 	orig_err = get_error_enable();
2580*0Sstevel@tonic-gate 	if (orig_err & EN_REG_CEEN)
2581*0Sstevel@tonic-gate 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2582*0Sstevel@tonic-gate 
2583*0Sstevel@tonic-gate 	/*
2584*0Sstevel@tonic-gate 	 * Our classification algorithm includes the line state before
2585*0Sstevel@tonic-gate 	 * the scrub; we'd like this captured after the detection and
2586*0Sstevel@tonic-gate 	 * before the algorithm below - the earlier the better.
2587*0Sstevel@tonic-gate 	 *
2588*0Sstevel@tonic-gate 	 * If we've come from a cpu CE trap then this info already exists
2589*0Sstevel@tonic-gate 	 * in the cpu logout area.
2590*0Sstevel@tonic-gate 	 *
2591*0Sstevel@tonic-gate 	 * For a CE detected by memscrub for which there was no trap
2592*0Sstevel@tonic-gate 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2593*0Sstevel@tonic-gate 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2594*0Sstevel@tonic-gate 	 * marked the fault structure as incomplete as a flag to later
2595*0Sstevel@tonic-gate 	 * logging code.
2596*0Sstevel@tonic-gate 	 *
2597*0Sstevel@tonic-gate 	 * If called directly from an IO detected CE there has been
2598*0Sstevel@tonic-gate 	 * no line data capture.  In this case we logout to the cpu logout
2599*0Sstevel@tonic-gate 	 * area - that's appropriate since it's the cpu cache data we need
2600*0Sstevel@tonic-gate 	 * for classification.  We thus borrow the cpu logout area for a
2601*0Sstevel@tonic-gate 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2602*0Sstevel@tonic-gate 	 * this time (we will invalidate it again below).
2603*0Sstevel@tonic-gate 	 *
2604*0Sstevel@tonic-gate 	 * If called from the partner check xcall handler then this cpu
2605*0Sstevel@tonic-gate 	 * (the partner) has not necessarily experienced a CE at this
2606*0Sstevel@tonic-gate 	 * address.  But we want to capture line state before its scrub
2607*0Sstevel@tonic-gate 	 * attempt since we use that in our classification.
2608*0Sstevel@tonic-gate 	 */
2609*0Sstevel@tonic-gate 	if (logout_tried == B_FALSE) {
2610*0Sstevel@tonic-gate 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2611*0Sstevel@tonic-gate 			disp |= CE_XDIAG_NOLOGOUT;
2612*0Sstevel@tonic-gate 	}
2613*0Sstevel@tonic-gate 
2614*0Sstevel@tonic-gate 	/*
2615*0Sstevel@tonic-gate 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2616*0Sstevel@tonic-gate 	 * no longer be valid (if DR'd since the initial event) so we
2617*0Sstevel@tonic-gate 	 * perform this scrub under on_trap protection.  If this access is
2618*0Sstevel@tonic-gate 	 * ok then further accesses below will also be ok - DR cannot
2619*0Sstevel@tonic-gate 	 * proceed while this thread is active (preemption is disabled);
2620*0Sstevel@tonic-gate 	 * to be safe we'll nonetheless use on_trap again below.
2621*0Sstevel@tonic-gate 	 */
2622*0Sstevel@tonic-gate 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2623*0Sstevel@tonic-gate 		cpu_scrubphys(ecc);
2624*0Sstevel@tonic-gate 	} else {
2625*0Sstevel@tonic-gate 		no_trap();
2626*0Sstevel@tonic-gate 		if (orig_err & EN_REG_CEEN)
2627*0Sstevel@tonic-gate 		    set_error_enable(orig_err);
2628*0Sstevel@tonic-gate 		kpreempt_enable();
2629*0Sstevel@tonic-gate 		return (disp);
2630*0Sstevel@tonic-gate 	}
2631*0Sstevel@tonic-gate 	no_trap();
2632*0Sstevel@tonic-gate 
2633*0Sstevel@tonic-gate 	/*
2634*0Sstevel@tonic-gate 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2635*0Sstevel@tonic-gate 	 * Note that it's quite possible that the read sourced the data from
2636*0Sstevel@tonic-gate 	 * another cpu.
2637*0Sstevel@tonic-gate 	 */
2638*0Sstevel@tonic-gate 	if (clear_ecc(ecc))
2639*0Sstevel@tonic-gate 		disp |= CE_XDIAG_CE1;
2640*0Sstevel@tonic-gate 
2641*0Sstevel@tonic-gate 	/*
2642*0Sstevel@tonic-gate 	 * Read the data again.  This time the read is very likely to
2643*0Sstevel@tonic-gate 	 * come from memory since the scrub induced a writeback to memory.
2644*0Sstevel@tonic-gate 	 */
2645*0Sstevel@tonic-gate 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2646*0Sstevel@tonic-gate 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2647*0Sstevel@tonic-gate 	} else {
2648*0Sstevel@tonic-gate 		no_trap();
2649*0Sstevel@tonic-gate 		if (orig_err & EN_REG_CEEN)
2650*0Sstevel@tonic-gate 		    set_error_enable(orig_err);
2651*0Sstevel@tonic-gate 		kpreempt_enable();
2652*0Sstevel@tonic-gate 		return (disp);
2653*0Sstevel@tonic-gate 	}
2654*0Sstevel@tonic-gate 	no_trap();
2655*0Sstevel@tonic-gate 
2656*0Sstevel@tonic-gate 	/* Did that read induce a CE that matches the AFAR? */
2657*0Sstevel@tonic-gate 	if (clear_ecc(ecc))
2658*0Sstevel@tonic-gate 		disp |= CE_XDIAG_CE2;
2659*0Sstevel@tonic-gate 
2660*0Sstevel@tonic-gate 	/*
2661*0Sstevel@tonic-gate 	 * Look at the logout information and record whether we found the
2662*0Sstevel@tonic-gate 	 * line in l2/l3 cache.  For Panther we are interested in whether
2663*0Sstevel@tonic-gate 	 * we found it in either cache (it won't reside in both but
2664*0Sstevel@tonic-gate 	 * it is possible to read it that way given the moving target).
2665*0Sstevel@tonic-gate 	 */
2666*0Sstevel@tonic-gate 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2667*0Sstevel@tonic-gate 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2668*0Sstevel@tonic-gate 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2669*0Sstevel@tonic-gate 		int hit, level;
2670*0Sstevel@tonic-gate 		int state;
2671*0Sstevel@tonic-gate 		int totalsize;
2672*0Sstevel@tonic-gate 		ch_ec_data_t *ecp;
2673*0Sstevel@tonic-gate 
2674*0Sstevel@tonic-gate 		/*
2675*0Sstevel@tonic-gate 		 * If hit is nonzero then a match was found and hit will
2676*0Sstevel@tonic-gate 		 * be one greater than the index which hit.  For Panther we
2677*0Sstevel@tonic-gate 		 * also need to pay attention to level to see which of l2$ or
2678*0Sstevel@tonic-gate 		 * l3$ it hit in.
2679*0Sstevel@tonic-gate 		 */
2680*0Sstevel@tonic-gate 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2681*0Sstevel@tonic-gate 		    0, &level);
2682*0Sstevel@tonic-gate 
2683*0Sstevel@tonic-gate 		if (hit) {
2684*0Sstevel@tonic-gate 			--hit;
2685*0Sstevel@tonic-gate 			disp |= CE_XDIAG_AFARMATCH;
2686*0Sstevel@tonic-gate 
2687*0Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2688*0Sstevel@tonic-gate 				if (level == 2)
2689*0Sstevel@tonic-gate 					ecp = &clop->clo_data.chd_l2_data[hit];
2690*0Sstevel@tonic-gate 				else
2691*0Sstevel@tonic-gate 					ecp = &clop->clo_data.chd_ec_data[hit];
2692*0Sstevel@tonic-gate 			} else {
2693*0Sstevel@tonic-gate 				ASSERT(level == 2);
2694*0Sstevel@tonic-gate 				ecp = &clop->clo_data.chd_ec_data[hit];
2695*0Sstevel@tonic-gate 			}
2696*0Sstevel@tonic-gate 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2697*0Sstevel@tonic-gate 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2698*0Sstevel@tonic-gate 			    ecc->flt_addr, ecp->ec_tag);
2699*0Sstevel@tonic-gate 
2700*0Sstevel@tonic-gate 			/*
2701*0Sstevel@tonic-gate 			 * Cheetah variants use different state encodings -
2702*0Sstevel@tonic-gate 			 * the CH_ECSTATE_* defines vary depending on the
2703*0Sstevel@tonic-gate 			 * module we're compiled for.  Translate into our
2704*0Sstevel@tonic-gate 			 * one true version.  Conflate Owner-Shared state
2705*0Sstevel@tonic-gate 			 * of SSM mode with Owner as victimisation of such
2706*0Sstevel@tonic-gate 			 * lines may cause a writeback.
2707*0Sstevel@tonic-gate 			 */
2708*0Sstevel@tonic-gate 			switch (state) {
2709*0Sstevel@tonic-gate 			case CH_ECSTATE_MOD:
2710*0Sstevel@tonic-gate 				disp |= EC_STATE_M;
2711*0Sstevel@tonic-gate 				break;
2712*0Sstevel@tonic-gate 
2713*0Sstevel@tonic-gate 			case CH_ECSTATE_OWN:
2714*0Sstevel@tonic-gate 			case CH_ECSTATE_OWS:
2715*0Sstevel@tonic-gate 				disp |= EC_STATE_O;
2716*0Sstevel@tonic-gate 				break;
2717*0Sstevel@tonic-gate 
2718*0Sstevel@tonic-gate 			case CH_ECSTATE_EXL:
2719*0Sstevel@tonic-gate 				disp |= EC_STATE_E;
2720*0Sstevel@tonic-gate 				break;
2721*0Sstevel@tonic-gate 
2722*0Sstevel@tonic-gate 			case CH_ECSTATE_SHR:
2723*0Sstevel@tonic-gate 				disp |= EC_STATE_S;
2724*0Sstevel@tonic-gate 				break;
2725*0Sstevel@tonic-gate 
2726*0Sstevel@tonic-gate 			default:
2727*0Sstevel@tonic-gate 				disp |= EC_STATE_I;
2728*0Sstevel@tonic-gate 				break;
2729*0Sstevel@tonic-gate 			}
2730*0Sstevel@tonic-gate 		}
2731*0Sstevel@tonic-gate 
2732*0Sstevel@tonic-gate 		/*
2733*0Sstevel@tonic-gate 		 * If we initiated the delayed logout then we are responsible
2734*0Sstevel@tonic-gate 		 * for invalidating the logout area.
2735*0Sstevel@tonic-gate 		 */
2736*0Sstevel@tonic-gate 		if (logout_tried == B_FALSE) {
2737*0Sstevel@tonic-gate 			bzero(clop, sizeof (ch_cpu_logout_t));
2738*0Sstevel@tonic-gate 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2739*0Sstevel@tonic-gate 		}
2740*0Sstevel@tonic-gate 	}
2741*0Sstevel@tonic-gate 
2742*0Sstevel@tonic-gate 	/*
2743*0Sstevel@tonic-gate 	 * Re-enable CEEN if we turned it off.
2744*0Sstevel@tonic-gate 	 */
2745*0Sstevel@tonic-gate 	if (orig_err & EN_REG_CEEN)
2746*0Sstevel@tonic-gate 	    set_error_enable(orig_err);
2747*0Sstevel@tonic-gate 	kpreempt_enable();
2748*0Sstevel@tonic-gate 
2749*0Sstevel@tonic-gate 	return (disp);
2750*0Sstevel@tonic-gate }
2751*0Sstevel@tonic-gate 
2752*0Sstevel@tonic-gate /*
2753*0Sstevel@tonic-gate  * Scrub a correctable memory error and collect data for classification
2754*0Sstevel@tonic-gate  * of CE type.  This function is called in the detection path, ie tl0 handling
2755*0Sstevel@tonic-gate  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2756*0Sstevel@tonic-gate  */
2757*0Sstevel@tonic-gate void
2758*0Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2759*0Sstevel@tonic-gate {
2760*0Sstevel@tonic-gate 	/*
2761*0Sstevel@tonic-gate 	 * Cheetah CE classification does not set any bits in flt_status.
2762*0Sstevel@tonic-gate 	 * Instead we will record classification datapoints in flt_disp.
2763*0Sstevel@tonic-gate 	 */
2764*0Sstevel@tonic-gate 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2765*0Sstevel@tonic-gate 
2766*0Sstevel@tonic-gate 	/*
2767*0Sstevel@tonic-gate 	 * To check if the error detected by IO is persistent, sticky or
2768*0Sstevel@tonic-gate 	 * intermittent.  This is noticed by clear_ecc().
2769*0Sstevel@tonic-gate 	 */
2770*0Sstevel@tonic-gate 	if (ecc->flt_status & ECC_IOBUS)
2771*0Sstevel@tonic-gate 		ecc->flt_stat = C_AFSR_MEMORY;
2772*0Sstevel@tonic-gate 
2773*0Sstevel@tonic-gate 	/*
2774*0Sstevel@tonic-gate 	 * Record information from this first part of the algorithm in
2775*0Sstevel@tonic-gate 	 * flt_disp.
2776*0Sstevel@tonic-gate 	 */
2777*0Sstevel@tonic-gate 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2778*0Sstevel@tonic-gate }
2779*0Sstevel@tonic-gate 
2780*0Sstevel@tonic-gate /*
2781*0Sstevel@tonic-gate  * Select a partner to perform a further CE classification check from.
2782*0Sstevel@tonic-gate  * Must be called with kernel preemption disabled (to stop the cpu list
2783*0Sstevel@tonic-gate  * from changing).  The detecting cpu we are partnering has cpuid
2784*0Sstevel@tonic-gate  * aflt->flt_inst; we might not be running on the detecting cpu.
2785*0Sstevel@tonic-gate  *
2786*0Sstevel@tonic-gate  * Restrict choice to active cpus in the same cpu partition as ourselves in
2787*0Sstevel@tonic-gate  * an effort to stop bad cpus in one partition causing other partitions to
2788*0Sstevel@tonic-gate  * perform excessive diagnostic activity.  Actually since the errorq drain
2789*0Sstevel@tonic-gate  * is run from a softint most of the time and that is a global mechanism
2790*0Sstevel@tonic-gate  * this isolation is only partial.  Return NULL if we fail to find a
2791*0Sstevel@tonic-gate  * suitable partner.
2792*0Sstevel@tonic-gate  *
2793*0Sstevel@tonic-gate  * We prefer a partner that is in a different latency group to ourselves as
2794*0Sstevel@tonic-gate  * we will share fewer datapaths.  If such a partner is unavailable then
2795*0Sstevel@tonic-gate  * choose one in the same lgroup but prefer a different chip and only allow
2796*0Sstevel@tonic-gate  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2797*0Sstevel@tonic-gate  * flags includes PTNR_SELFOK then permit selection of the original detector.
2798*0Sstevel@tonic-gate  *
2799*0Sstevel@tonic-gate  * We keep a cache of the last partner selected for a cpu, and we'll try to
2800*0Sstevel@tonic-gate  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2801*0Sstevel@tonic-gate  * have passed since that selection was made.  This provides the benefit
2802*0Sstevel@tonic-gate  * of the point-of-view of different partners over time but without
2803*0Sstevel@tonic-gate  * requiring frequent cpu list traversals.
2804*0Sstevel@tonic-gate  */
2805*0Sstevel@tonic-gate 
2806*0Sstevel@tonic-gate #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2807*0Sstevel@tonic-gate #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2808*0Sstevel@tonic-gate 
2809*0Sstevel@tonic-gate static cpu_t *
2810*0Sstevel@tonic-gate ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2811*0Sstevel@tonic-gate {
2812*0Sstevel@tonic-gate 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2813*0Sstevel@tonic-gate 	hrtime_t lasttime, thistime;
2814*0Sstevel@tonic-gate 
2815*0Sstevel@tonic-gate 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2816*0Sstevel@tonic-gate 
2817*0Sstevel@tonic-gate 	dtcr = cpu[aflt->flt_inst];
2818*0Sstevel@tonic-gate 
2819*0Sstevel@tonic-gate 	/*
2820*0Sstevel@tonic-gate 	 * Short-circuit for the following cases:
2821*0Sstevel@tonic-gate 	 *	. the dtcr is not flagged active
2822*0Sstevel@tonic-gate 	 *	. there is just one cpu present
2823*0Sstevel@tonic-gate 	 *	. the detector has disappeared
2824*0Sstevel@tonic-gate 	 *	. we were given a bad flt_inst cpuid; this should not happen
2825*0Sstevel@tonic-gate 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2826*0Sstevel@tonic-gate 	 *	  reason to panic.
2827*0Sstevel@tonic-gate 	 *	. there is just one cpu left online in the cpu partition
2828*0Sstevel@tonic-gate 	 *
2829*0Sstevel@tonic-gate 	 * If we return NULL after this point then we do not update the
2830*0Sstevel@tonic-gate 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2831*0Sstevel@tonic-gate 	 * again next time; this is the case where the only other cpu online
2832*0Sstevel@tonic-gate 	 * in the detector's partition is on the same chip as the detector
2833*0Sstevel@tonic-gate 	 * and since CEEN re-enable is throttled even that case should not
2834*0Sstevel@tonic-gate 	 * hurt performance.
2835*0Sstevel@tonic-gate 	 */
2836*0Sstevel@tonic-gate 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2837*0Sstevel@tonic-gate 		return (NULL);
2838*0Sstevel@tonic-gate 	}
2839*0Sstevel@tonic-gate 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2840*0Sstevel@tonic-gate 		if (flags & PTNR_SELFOK) {
2841*0Sstevel@tonic-gate 			*typep = CE_XDIAG_PTNR_SELF;
2842*0Sstevel@tonic-gate 			return (dtcr);
2843*0Sstevel@tonic-gate 		} else {
2844*0Sstevel@tonic-gate 			return (NULL);
2845*0Sstevel@tonic-gate 		}
2846*0Sstevel@tonic-gate 	}
2847*0Sstevel@tonic-gate 
2848*0Sstevel@tonic-gate 	thistime = gethrtime();
2849*0Sstevel@tonic-gate 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2850*0Sstevel@tonic-gate 
2851*0Sstevel@tonic-gate 	/*
2852*0Sstevel@tonic-gate 	 * Select a starting point.
2853*0Sstevel@tonic-gate 	 */
2854*0Sstevel@tonic-gate 	if (!lasttime) {
2855*0Sstevel@tonic-gate 		/*
2856*0Sstevel@tonic-gate 		 * We've never selected a partner for this detector before.
2857*0Sstevel@tonic-gate 		 * Start the scan at the next online cpu in the same cpu
2858*0Sstevel@tonic-gate 		 * partition.
2859*0Sstevel@tonic-gate 		 */
2860*0Sstevel@tonic-gate 		sp = dtcr->cpu_next_part;
2861*0Sstevel@tonic-gate 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2862*0Sstevel@tonic-gate 		/*
2863*0Sstevel@tonic-gate 		 * Our last selection has not aged yet.  If this partner:
2864*0Sstevel@tonic-gate 		 *	. is still a valid cpu,
2865*0Sstevel@tonic-gate 		 *	. is still in the same partition as the detector
2866*0Sstevel@tonic-gate 		 *	. is still marked active
2867*0Sstevel@tonic-gate 		 *	. satisfies the 'flags' argument criteria
2868*0Sstevel@tonic-gate 		 * then select it again without updating the timestamp.
2869*0Sstevel@tonic-gate 		 */
2870*0Sstevel@tonic-gate 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2871*0Sstevel@tonic-gate 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2872*0Sstevel@tonic-gate 		    !cpu_flagged_active(sp->cpu_flags) ||
2873*0Sstevel@tonic-gate 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2874*0Sstevel@tonic-gate 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2875*0Sstevel@tonic-gate 		    !(flags & PTNR_SIBLINGOK))) {
2876*0Sstevel@tonic-gate 			sp = dtcr->cpu_next_part;
2877*0Sstevel@tonic-gate 		} else {
2878*0Sstevel@tonic-gate 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2879*0Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_REMOTE;
2880*0Sstevel@tonic-gate 			} else if (sp == dtcr) {
2881*0Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_SELF;
2882*0Sstevel@tonic-gate 			} else if (sp->cpu_chip->chip_id ==
2883*0Sstevel@tonic-gate 			    dtcr->cpu_chip->chip_id) {
2884*0Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_SIBLING;
2885*0Sstevel@tonic-gate 			} else {
2886*0Sstevel@tonic-gate 				*typep = CE_XDIAG_PTNR_LOCAL;
2887*0Sstevel@tonic-gate 			}
2888*0Sstevel@tonic-gate 			return (sp);
2889*0Sstevel@tonic-gate 		}
2890*0Sstevel@tonic-gate 	} else {
2891*0Sstevel@tonic-gate 		/*
2892*0Sstevel@tonic-gate 		 * Our last selection has aged.  If it is nonetheless still a
2893*0Sstevel@tonic-gate 		 * valid cpu then start the scan at the next cpu in the
2894*0Sstevel@tonic-gate 		 * partition after our last partner.  If the last selection
2895*0Sstevel@tonic-gate 		 * is no longer a valid cpu then go with our default.  In
2896*0Sstevel@tonic-gate 		 * this way we slowly cycle through possible partners to
2897*0Sstevel@tonic-gate 		 * obtain multiple viewpoints over time.
2898*0Sstevel@tonic-gate 		 */
2899*0Sstevel@tonic-gate 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2900*0Sstevel@tonic-gate 		if (sp == NULL) {
2901*0Sstevel@tonic-gate 			sp = dtcr->cpu_next_part;
2902*0Sstevel@tonic-gate 		} else {
2903*0Sstevel@tonic-gate 			sp = sp->cpu_next_part;		/* may be dtcr */
2904*0Sstevel@tonic-gate 			if (sp->cpu_part != dtcr->cpu_part)
2905*0Sstevel@tonic-gate 				sp = dtcr;
2906*0Sstevel@tonic-gate 		}
2907*0Sstevel@tonic-gate 	}
2908*0Sstevel@tonic-gate 
2909*0Sstevel@tonic-gate 	/*
2910*0Sstevel@tonic-gate 	 * We have a proposed starting point for our search, but if this
2911*0Sstevel@tonic-gate 	 * cpu is offline then its cpu_next_part will point to itself
2912*0Sstevel@tonic-gate 	 * so we can't use that to iterate over cpus in this partition in
2913*0Sstevel@tonic-gate 	 * the loop below.  We still want to avoid iterating over cpus not
2914*0Sstevel@tonic-gate 	 * in our partition, so in the case that our starting point is offline
2915*0Sstevel@tonic-gate 	 * we will repoint it to be the detector itself;  and if the detector
2916*0Sstevel@tonic-gate 	 * happens to be offline we'll return NULL from the following loop.
2917*0Sstevel@tonic-gate 	 */
2918*0Sstevel@tonic-gate 	if (!cpu_flagged_active(sp->cpu_flags)) {
2919*0Sstevel@tonic-gate 		sp = dtcr;
2920*0Sstevel@tonic-gate 	}
2921*0Sstevel@tonic-gate 
2922*0Sstevel@tonic-gate 	ptnr = sp;
2923*0Sstevel@tonic-gate 	locptnr = NULL;
2924*0Sstevel@tonic-gate 	sibptnr = NULL;
2925*0Sstevel@tonic-gate 	do {
2926*0Sstevel@tonic-gate 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
2927*0Sstevel@tonic-gate 			continue;
2928*0Sstevel@tonic-gate 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2929*0Sstevel@tonic-gate 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
2930*0Sstevel@tonic-gate 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2931*0Sstevel@tonic-gate 			*typep = CE_XDIAG_PTNR_REMOTE;
2932*0Sstevel@tonic-gate 			return (ptnr);
2933*0Sstevel@tonic-gate 		}
2934*0Sstevel@tonic-gate 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
2935*0Sstevel@tonic-gate 			if (sibptnr == NULL)
2936*0Sstevel@tonic-gate 				sibptnr = ptnr;
2937*0Sstevel@tonic-gate 			continue;
2938*0Sstevel@tonic-gate 		}
2939*0Sstevel@tonic-gate 		if (locptnr == NULL)
2940*0Sstevel@tonic-gate 			locptnr = ptnr;
2941*0Sstevel@tonic-gate 	} while ((ptnr = ptnr->cpu_next_part) != sp);
2942*0Sstevel@tonic-gate 
2943*0Sstevel@tonic-gate 	/*
2944*0Sstevel@tonic-gate 	 * A foreign partner has already been returned if one was available.
2945*0Sstevel@tonic-gate 	 *
2946*0Sstevel@tonic-gate 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
2947*0Sstevel@tonic-gate 	 * detector, is active, and is not a sibling of the detector.
2948*0Sstevel@tonic-gate 	 *
2949*0Sstevel@tonic-gate 	 * If sibptnr is not NULL it is a sibling of the detector, and is
2950*0Sstevel@tonic-gate 	 * active.
2951*0Sstevel@tonic-gate 	 *
2952*0Sstevel@tonic-gate 	 * If we have to resort to using the detector itself we have already
2953*0Sstevel@tonic-gate 	 * checked that it is active.
2954*0Sstevel@tonic-gate 	 */
2955*0Sstevel@tonic-gate 	if (locptnr) {
2956*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
2957*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2958*0Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_LOCAL;
2959*0Sstevel@tonic-gate 		return (locptnr);
2960*0Sstevel@tonic-gate 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
2961*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
2962*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2963*0Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_SIBLING;
2964*0Sstevel@tonic-gate 		return (sibptnr);
2965*0Sstevel@tonic-gate 	} else if (flags & PTNR_SELFOK) {
2966*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
2967*0Sstevel@tonic-gate 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2968*0Sstevel@tonic-gate 		*typep = CE_XDIAG_PTNR_SELF;
2969*0Sstevel@tonic-gate 		return (dtcr);
2970*0Sstevel@tonic-gate 	}
2971*0Sstevel@tonic-gate 
2972*0Sstevel@tonic-gate 	return (NULL);
2973*0Sstevel@tonic-gate }
2974*0Sstevel@tonic-gate 
2975*0Sstevel@tonic-gate /*
2976*0Sstevel@tonic-gate  * Cross call handler that is requested to run on the designated partner of
2977*0Sstevel@tonic-gate  * a cpu that experienced a possibly sticky or possibly persistnet CE.
2978*0Sstevel@tonic-gate  */
2979*0Sstevel@tonic-gate static void
2980*0Sstevel@tonic-gate ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
2981*0Sstevel@tonic-gate {
2982*0Sstevel@tonic-gate 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
2983*0Sstevel@tonic-gate }
2984*0Sstevel@tonic-gate 
2985*0Sstevel@tonic-gate /*
2986*0Sstevel@tonic-gate  * The associated errorqs are never destroyed so we do not need to deal with
2987*0Sstevel@tonic-gate  * them disappearing before this timeout fires.  If the affected memory
2988*0Sstevel@tonic-gate  * has been DR'd out since the original event the scrub algrithm will catch
2989*0Sstevel@tonic-gate  * any errors and return null disposition info.  If the original detecting
2990*0Sstevel@tonic-gate  * cpu has been DR'd out then ereport detector info will not be able to
2991*0Sstevel@tonic-gate  * lookup CPU type;  with a small timeout this is unlikely.
2992*0Sstevel@tonic-gate  */
2993*0Sstevel@tonic-gate static void
2994*0Sstevel@tonic-gate ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
2995*0Sstevel@tonic-gate {
2996*0Sstevel@tonic-gate 	struct async_flt *aflt = cbarg->lkycb_aflt;
2997*0Sstevel@tonic-gate 	uchar_t disp;
2998*0Sstevel@tonic-gate 	cpu_t *cp;
2999*0Sstevel@tonic-gate 	int ptnrtype;
3000*0Sstevel@tonic-gate 
3001*0Sstevel@tonic-gate 	kpreempt_disable();
3002*0Sstevel@tonic-gate 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3003*0Sstevel@tonic-gate 	    &ptnrtype)) {
3004*0Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3005*0Sstevel@tonic-gate 		    (uint64_t)&disp);
3006*0Sstevel@tonic-gate 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3007*0Sstevel@tonic-gate 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3008*0Sstevel@tonic-gate 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3009*0Sstevel@tonic-gate 	} else {
3010*0Sstevel@tonic-gate 		ce_xdiag_lkydrops++;
3011*0Sstevel@tonic-gate 		if (ncpus > 1)
3012*0Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3013*0Sstevel@tonic-gate 			    CE_XDIAG_SKIP_NOPTNR);
3014*0Sstevel@tonic-gate 	}
3015*0Sstevel@tonic-gate 	kpreempt_enable();
3016*0Sstevel@tonic-gate 
3017*0Sstevel@tonic-gate 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3018*0Sstevel@tonic-gate 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3019*0Sstevel@tonic-gate }
3020*0Sstevel@tonic-gate 
3021*0Sstevel@tonic-gate /*
3022*0Sstevel@tonic-gate  * Called from errorq drain code when processing a CE error, both from
3023*0Sstevel@tonic-gate  * CPU and PCI drain functions.  Decide what further classification actions,
3024*0Sstevel@tonic-gate  * if any, we will perform.  Perform immediate actions now, and schedule
3025*0Sstevel@tonic-gate  * delayed actions as required.  Note that we are no longer necessarily running
3026*0Sstevel@tonic-gate  * on the detecting cpu, and that the async_flt structure will not persist on
3027*0Sstevel@tonic-gate  * return from this function.
3028*0Sstevel@tonic-gate  *
3029*0Sstevel@tonic-gate  * Calls to this function should aim to be self-throtlling in some way.  With
3030*0Sstevel@tonic-gate  * the delayed re-enable of CEEN the absolute rate of calls should not
3031*0Sstevel@tonic-gate  * be excessive.  Callers should also avoid performing in-depth classification
3032*0Sstevel@tonic-gate  * for events in pages that are already known to be suspect.
3033*0Sstevel@tonic-gate  *
3034*0Sstevel@tonic-gate  * We return nonzero to indicate that the event has been copied and
3035*0Sstevel@tonic-gate  * recirculated for further testing.  The caller should not log the event
3036*0Sstevel@tonic-gate  * in this case - it will be logged when further test results are available.
3037*0Sstevel@tonic-gate  *
3038*0Sstevel@tonic-gate  * Our possible contexts are that of errorq_drain: below lock level or from
3039*0Sstevel@tonic-gate  * panic context.  We can assume that the cpu we are running on is online.
3040*0Sstevel@tonic-gate  */
3041*0Sstevel@tonic-gate 
3042*0Sstevel@tonic-gate 
3043*0Sstevel@tonic-gate #ifdef DEBUG
3044*0Sstevel@tonic-gate static int ce_xdiag_forceaction;
3045*0Sstevel@tonic-gate #endif
3046*0Sstevel@tonic-gate 
3047*0Sstevel@tonic-gate int
3048*0Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3049*0Sstevel@tonic-gate     errorq_elem_t *eqep, size_t afltoffset)
3050*0Sstevel@tonic-gate {
3051*0Sstevel@tonic-gate 	ce_dispact_t dispact, action;
3052*0Sstevel@tonic-gate 	cpu_t *cp;
3053*0Sstevel@tonic-gate 	uchar_t dtcrinfo, disp;
3054*0Sstevel@tonic-gate 	int ptnrtype;
3055*0Sstevel@tonic-gate 
3056*0Sstevel@tonic-gate 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3057*0Sstevel@tonic-gate 		ce_xdiag_drops++;
3058*0Sstevel@tonic-gate 		return (0);
3059*0Sstevel@tonic-gate 	} else if (!aflt->flt_in_memory) {
3060*0Sstevel@tonic-gate 		ce_xdiag_drops++;
3061*0Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3062*0Sstevel@tonic-gate 		return (0);
3063*0Sstevel@tonic-gate 	}
3064*0Sstevel@tonic-gate 
3065*0Sstevel@tonic-gate 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3066*0Sstevel@tonic-gate 
3067*0Sstevel@tonic-gate 	/*
3068*0Sstevel@tonic-gate 	 * Some correctable events are not scrubbed/classified, such as those
3069*0Sstevel@tonic-gate 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3070*0Sstevel@tonic-gate 	 * initial detector classification go no further.
3071*0Sstevel@tonic-gate 	 */
3072*0Sstevel@tonic-gate 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3073*0Sstevel@tonic-gate 		ce_xdiag_drops++;
3074*0Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3075*0Sstevel@tonic-gate 		return (0);
3076*0Sstevel@tonic-gate 	}
3077*0Sstevel@tonic-gate 
3078*0Sstevel@tonic-gate 	dispact = CE_DISPACT(ce_disp_table,
3079*0Sstevel@tonic-gate 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3080*0Sstevel@tonic-gate 	    CE_XDIAG_STATE(dtcrinfo),
3081*0Sstevel@tonic-gate 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3082*0Sstevel@tonic-gate 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3083*0Sstevel@tonic-gate 
3084*0Sstevel@tonic-gate 
3085*0Sstevel@tonic-gate 	action = CE_ACT(dispact);	/* bad lookup caught below */
3086*0Sstevel@tonic-gate #ifdef DEBUG
3087*0Sstevel@tonic-gate 	if (ce_xdiag_forceaction != 0)
3088*0Sstevel@tonic-gate 		action = ce_xdiag_forceaction;
3089*0Sstevel@tonic-gate #endif
3090*0Sstevel@tonic-gate 
3091*0Sstevel@tonic-gate 	switch (action) {
3092*0Sstevel@tonic-gate 	case CE_ACT_LKYCHK: {
3093*0Sstevel@tonic-gate 		caddr_t ndata;
3094*0Sstevel@tonic-gate 		errorq_elem_t *neqep;
3095*0Sstevel@tonic-gate 		struct async_flt *ecc;
3096*0Sstevel@tonic-gate 		ce_lkychk_cb_t *cbargp;
3097*0Sstevel@tonic-gate 
3098*0Sstevel@tonic-gate 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3099*0Sstevel@tonic-gate 			ce_xdiag_lkydrops++;
3100*0Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3101*0Sstevel@tonic-gate 			    CE_XDIAG_SKIP_DUPFAIL);
3102*0Sstevel@tonic-gate 			break;
3103*0Sstevel@tonic-gate 		}
3104*0Sstevel@tonic-gate 		ecc = (struct async_flt *)(ndata + afltoffset);
3105*0Sstevel@tonic-gate 
3106*0Sstevel@tonic-gate 		ASSERT(ecc->flt_class == CPU_FAULT ||
3107*0Sstevel@tonic-gate 		    ecc->flt_class == BUS_FAULT);
3108*0Sstevel@tonic-gate 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3109*0Sstevel@tonic-gate 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3110*0Sstevel@tonic-gate 
3111*0Sstevel@tonic-gate 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3112*0Sstevel@tonic-gate 		cbargp->lkycb_aflt = ecc;
3113*0Sstevel@tonic-gate 		cbargp->lkycb_eqp = eqp;
3114*0Sstevel@tonic-gate 		cbargp->lkycb_eqep = neqep;
3115*0Sstevel@tonic-gate 
3116*0Sstevel@tonic-gate 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3117*0Sstevel@tonic-gate 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3118*0Sstevel@tonic-gate 		return (1);
3119*0Sstevel@tonic-gate 	}
3120*0Sstevel@tonic-gate 
3121*0Sstevel@tonic-gate 	case CE_ACT_PTNRCHK:
3122*0Sstevel@tonic-gate 		kpreempt_disable();	/* stop cpu list changing */
3123*0Sstevel@tonic-gate 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3124*0Sstevel@tonic-gate 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3125*0Sstevel@tonic-gate 			    (uint64_t)aflt, (uint64_t)&disp);
3126*0Sstevel@tonic-gate 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3127*0Sstevel@tonic-gate 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3128*0Sstevel@tonic-gate 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3129*0Sstevel@tonic-gate 		} else if (ncpus > 1) {
3130*0Sstevel@tonic-gate 			ce_xdiag_ptnrdrops++;
3131*0Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3132*0Sstevel@tonic-gate 			    CE_XDIAG_SKIP_NOPTNR);
3133*0Sstevel@tonic-gate 		} else {
3134*0Sstevel@tonic-gate 			ce_xdiag_ptnrdrops++;
3135*0Sstevel@tonic-gate 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3136*0Sstevel@tonic-gate 			    CE_XDIAG_SKIP_UNIPROC);
3137*0Sstevel@tonic-gate 		}
3138*0Sstevel@tonic-gate 		kpreempt_enable();
3139*0Sstevel@tonic-gate 		break;
3140*0Sstevel@tonic-gate 
3141*0Sstevel@tonic-gate 	case CE_ACT_DONE:
3142*0Sstevel@tonic-gate 		break;
3143*0Sstevel@tonic-gate 
3144*0Sstevel@tonic-gate 	case CE_ACT(CE_DISP_BAD):
3145*0Sstevel@tonic-gate 	default:
3146*0Sstevel@tonic-gate #ifdef DEBUG
3147*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3148*0Sstevel@tonic-gate #endif
3149*0Sstevel@tonic-gate 		ce_xdiag_bad++;
3150*0Sstevel@tonic-gate 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3151*0Sstevel@tonic-gate 		break;
3152*0Sstevel@tonic-gate 	}
3153*0Sstevel@tonic-gate 
3154*0Sstevel@tonic-gate 	return (0);
3155*0Sstevel@tonic-gate }
3156*0Sstevel@tonic-gate 
3157*0Sstevel@tonic-gate /*
3158*0Sstevel@tonic-gate  * We route all errors through a single switch statement.
3159*0Sstevel@tonic-gate  */
3160*0Sstevel@tonic-gate void
3161*0Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
3162*0Sstevel@tonic-gate {
3163*0Sstevel@tonic-gate 	switch (aflt->flt_class) {
3164*0Sstevel@tonic-gate 	case CPU_FAULT:
3165*0Sstevel@tonic-gate 		cpu_ereport_init(aflt);
3166*0Sstevel@tonic-gate 		if (cpu_async_log_err(aflt, NULL))
3167*0Sstevel@tonic-gate 			cpu_ereport_post(aflt);
3168*0Sstevel@tonic-gate 		break;
3169*0Sstevel@tonic-gate 
3170*0Sstevel@tonic-gate 	case BUS_FAULT:
3171*0Sstevel@tonic-gate 		bus_async_log_err(aflt);
3172*0Sstevel@tonic-gate 		break;
3173*0Sstevel@tonic-gate 
3174*0Sstevel@tonic-gate 	default:
3175*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3176*0Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3177*0Sstevel@tonic-gate 		return;
3178*0Sstevel@tonic-gate 	}
3179*0Sstevel@tonic-gate }
3180*0Sstevel@tonic-gate 
3181*0Sstevel@tonic-gate /*
3182*0Sstevel@tonic-gate  * Routine for panic hook callback from panic_idle().
3183*0Sstevel@tonic-gate  */
3184*0Sstevel@tonic-gate void
3185*0Sstevel@tonic-gate cpu_async_panic_callb(void)
3186*0Sstevel@tonic-gate {
3187*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
3188*0Sstevel@tonic-gate 	struct async_flt *aflt;
3189*0Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
3190*0Sstevel@tonic-gate 	uint64_t afsr_errs;
3191*0Sstevel@tonic-gate 
3192*0Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
3193*0Sstevel@tonic-gate 
3194*0Sstevel@tonic-gate 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3195*0Sstevel@tonic-gate 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3196*0Sstevel@tonic-gate 
3197*0Sstevel@tonic-gate 	if (afsr_errs) {
3198*0Sstevel@tonic-gate 
3199*0Sstevel@tonic-gate 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3200*0Sstevel@tonic-gate 		aflt = (struct async_flt *)&ch_flt;
3201*0Sstevel@tonic-gate 		aflt->flt_id = gethrtime_waitfree();
3202*0Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
3203*0Sstevel@tonic-gate 		aflt->flt_inst = CPU->cpu_id;
3204*0Sstevel@tonic-gate 		aflt->flt_stat = cpu_error_regs.afsr;
3205*0Sstevel@tonic-gate 		aflt->flt_addr = cpu_error_regs.afar;
3206*0Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
3207*0Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
3208*0Sstevel@tonic-gate 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3209*0Sstevel@tonic-gate 		aflt->flt_panic = 1;
3210*0Sstevel@tonic-gate 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3211*0Sstevel@tonic-gate 		ch_flt.afsr_errs = afsr_errs;
3212*0Sstevel@tonic-gate #if defined(SERRANO)
3213*0Sstevel@tonic-gate 		ch_flt.afar2 = cpu_error_regs.afar2;
3214*0Sstevel@tonic-gate #endif	/* SERRANO */
3215*0Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3216*0Sstevel@tonic-gate 	}
3217*0Sstevel@tonic-gate }
3218*0Sstevel@tonic-gate 
3219*0Sstevel@tonic-gate /*
3220*0Sstevel@tonic-gate  * Routine to convert a syndrome into a syndrome code.
3221*0Sstevel@tonic-gate  */
3222*0Sstevel@tonic-gate static int
3223*0Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3224*0Sstevel@tonic-gate {
3225*0Sstevel@tonic-gate 	if (synd_status == AFLT_STAT_INVALID)
3226*0Sstevel@tonic-gate 		return (-1);
3227*0Sstevel@tonic-gate 
3228*0Sstevel@tonic-gate 	/*
3229*0Sstevel@tonic-gate 	 * Use the syndrome to index the appropriate syndrome table,
3230*0Sstevel@tonic-gate 	 * to get the code indicating which bit(s) is(are) bad.
3231*0Sstevel@tonic-gate 	 */
3232*0Sstevel@tonic-gate 	if (afsr_bit &
3233*0Sstevel@tonic-gate 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3234*0Sstevel@tonic-gate 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3235*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
3236*0Sstevel@tonic-gate 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3237*0Sstevel@tonic-gate 				return (-1);
3238*0Sstevel@tonic-gate 			else
3239*0Sstevel@tonic-gate 				return (BPAR0 + synd);
3240*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
3241*0Sstevel@tonic-gate 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3242*0Sstevel@tonic-gate 				return (-1);
3243*0Sstevel@tonic-gate 			else
3244*0Sstevel@tonic-gate 				return (mtag_syndrome_tab[synd]);
3245*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
3246*0Sstevel@tonic-gate 		} else {
3247*0Sstevel@tonic-gate 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3248*0Sstevel@tonic-gate 				return (-1);
3249*0Sstevel@tonic-gate 			else
3250*0Sstevel@tonic-gate 				return (ecc_syndrome_tab[synd]);
3251*0Sstevel@tonic-gate 		}
3252*0Sstevel@tonic-gate 	} else {
3253*0Sstevel@tonic-gate 		return (-1);
3254*0Sstevel@tonic-gate 	}
3255*0Sstevel@tonic-gate }
3256*0Sstevel@tonic-gate 
3257*0Sstevel@tonic-gate /*
3258*0Sstevel@tonic-gate  * Routine to return a string identifying the physical name
3259*0Sstevel@tonic-gate  * associated with a memory/cache error.
3260*0Sstevel@tonic-gate  */
3261*0Sstevel@tonic-gate int
3262*0Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3263*0Sstevel@tonic-gate     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3264*0Sstevel@tonic-gate     ushort_t flt_status, char *buf, int buflen, int *lenp)
3265*0Sstevel@tonic-gate {
3266*0Sstevel@tonic-gate 	int synd_code;
3267*0Sstevel@tonic-gate 	int ret;
3268*0Sstevel@tonic-gate 
3269*0Sstevel@tonic-gate 	/*
3270*0Sstevel@tonic-gate 	 * An AFSR of -1 defaults to a memory syndrome.
3271*0Sstevel@tonic-gate 	 */
3272*0Sstevel@tonic-gate 	if (flt_stat == (uint64_t)-1)
3273*0Sstevel@tonic-gate 		flt_stat = C_AFSR_CE;
3274*0Sstevel@tonic-gate 
3275*0Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3276*0Sstevel@tonic-gate 
3277*0Sstevel@tonic-gate 	/*
3278*0Sstevel@tonic-gate 	 * Syndrome code must be either a single-bit error code
3279*0Sstevel@tonic-gate 	 * (0...143) or -1 for unum lookup.
3280*0Sstevel@tonic-gate 	 */
3281*0Sstevel@tonic-gate 	if (synd_code < 0 || synd_code >= M2)
3282*0Sstevel@tonic-gate 		synd_code = -1;
3283*0Sstevel@tonic-gate 	if (&plat_get_mem_unum) {
3284*0Sstevel@tonic-gate 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3285*0Sstevel@tonic-gate 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3286*0Sstevel@tonic-gate 			buf[0] = '\0';
3287*0Sstevel@tonic-gate 			*lenp = 0;
3288*0Sstevel@tonic-gate 		}
3289*0Sstevel@tonic-gate 
3290*0Sstevel@tonic-gate 		return (ret);
3291*0Sstevel@tonic-gate 	}
3292*0Sstevel@tonic-gate 
3293*0Sstevel@tonic-gate 	return (ENOTSUP);
3294*0Sstevel@tonic-gate }
3295*0Sstevel@tonic-gate 
3296*0Sstevel@tonic-gate /*
3297*0Sstevel@tonic-gate  * Wrapper for cpu_get_mem_unum() routine that takes an
3298*0Sstevel@tonic-gate  * async_flt struct rather than explicit arguments.
3299*0Sstevel@tonic-gate  */
3300*0Sstevel@tonic-gate int
3301*0Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3302*0Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
3303*0Sstevel@tonic-gate {
3304*0Sstevel@tonic-gate 	/*
3305*0Sstevel@tonic-gate 	 * If we come thru here for an IO bus error aflt->flt_stat will
3306*0Sstevel@tonic-gate 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3307*0Sstevel@tonic-gate 	 * so it will interpret this as a memory error.
3308*0Sstevel@tonic-gate 	 */
3309*0Sstevel@tonic-gate 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3310*0Sstevel@tonic-gate 	    (aflt->flt_class == BUS_FAULT) ?
3311*0Sstevel@tonic-gate 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3312*0Sstevel@tonic-gate 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3313*0Sstevel@tonic-gate 	    aflt->flt_status, buf, buflen, lenp));
3314*0Sstevel@tonic-gate }
3315*0Sstevel@tonic-gate 
3316*0Sstevel@tonic-gate /*
3317*0Sstevel@tonic-gate  * This routine is a more generic interface to cpu_get_mem_unum()
3318*0Sstevel@tonic-gate  * that may be used by other modules (e.g. mm).
3319*0Sstevel@tonic-gate  */
3320*0Sstevel@tonic-gate int
3321*0Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3322*0Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
3323*0Sstevel@tonic-gate {
3324*0Sstevel@tonic-gate 	int synd_status, flt_in_memory, ret;
3325*0Sstevel@tonic-gate 	ushort_t flt_status = 0;
3326*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
3327*0Sstevel@tonic-gate 
3328*0Sstevel@tonic-gate 	/*
3329*0Sstevel@tonic-gate 	 * Check for an invalid address.
3330*0Sstevel@tonic-gate 	 */
3331*0Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
3332*0Sstevel@tonic-gate 		return (ENXIO);
3333*0Sstevel@tonic-gate 
3334*0Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
3335*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
3336*0Sstevel@tonic-gate 	else
3337*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
3338*0Sstevel@tonic-gate 
3339*0Sstevel@tonic-gate 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3340*0Sstevel@tonic-gate 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3341*0Sstevel@tonic-gate 
3342*0Sstevel@tonic-gate 	/*
3343*0Sstevel@tonic-gate 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3344*0Sstevel@tonic-gate 	 * For Panther, L2$ is not external, so we don't want to
3345*0Sstevel@tonic-gate 	 * generate an E$ unum for those errors.
3346*0Sstevel@tonic-gate 	 */
3347*0Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3348*0Sstevel@tonic-gate 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3349*0Sstevel@tonic-gate 			flt_status |= ECC_ECACHE;
3350*0Sstevel@tonic-gate 	} else {
3351*0Sstevel@tonic-gate 		if (*afsr & C_AFSR_ECACHE)
3352*0Sstevel@tonic-gate 			flt_status |= ECC_ECACHE;
3353*0Sstevel@tonic-gate 	}
3354*0Sstevel@tonic-gate 
3355*0Sstevel@tonic-gate 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3356*0Sstevel@tonic-gate 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3357*0Sstevel@tonic-gate 	if (ret != 0)
3358*0Sstevel@tonic-gate 		return (ret);
3359*0Sstevel@tonic-gate 
3360*0Sstevel@tonic-gate 	if (*lenp >= buflen)
3361*0Sstevel@tonic-gate 		return (ENAMETOOLONG);
3362*0Sstevel@tonic-gate 
3363*0Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
3364*0Sstevel@tonic-gate 
3365*0Sstevel@tonic-gate 	return (0);
3366*0Sstevel@tonic-gate }
3367*0Sstevel@tonic-gate 
3368*0Sstevel@tonic-gate /*
3369*0Sstevel@tonic-gate  * Routine to return memory information associated
3370*0Sstevel@tonic-gate  * with a physical address and syndrome.
3371*0Sstevel@tonic-gate  */
3372*0Sstevel@tonic-gate int
3373*0Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar,
3374*0Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3375*0Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp)
3376*0Sstevel@tonic-gate {
3377*0Sstevel@tonic-gate 	int synd_status, synd_code;
3378*0Sstevel@tonic-gate 
3379*0Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
3380*0Sstevel@tonic-gate 		return (ENXIO);
3381*0Sstevel@tonic-gate 
3382*0Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
3383*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
3384*0Sstevel@tonic-gate 	else
3385*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
3386*0Sstevel@tonic-gate 
3387*0Sstevel@tonic-gate 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3388*0Sstevel@tonic-gate 
3389*0Sstevel@tonic-gate 	if (p2get_mem_info != NULL)
3390*0Sstevel@tonic-gate 		return ((p2get_mem_info)(synd_code, afar,
3391*0Sstevel@tonic-gate 			mem_sizep, seg_sizep, bank_sizep,
3392*0Sstevel@tonic-gate 			segsp, banksp, mcidp));
3393*0Sstevel@tonic-gate 	else
3394*0Sstevel@tonic-gate 		return (ENOTSUP);
3395*0Sstevel@tonic-gate }
3396*0Sstevel@tonic-gate 
3397*0Sstevel@tonic-gate /*
3398*0Sstevel@tonic-gate  * Routine to return a string identifying the physical
3399*0Sstevel@tonic-gate  * name associated with a cpuid.
3400*0Sstevel@tonic-gate  */
3401*0Sstevel@tonic-gate int
3402*0Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3403*0Sstevel@tonic-gate {
3404*0Sstevel@tonic-gate 	int ret;
3405*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
3406*0Sstevel@tonic-gate 
3407*0Sstevel@tonic-gate 	if (&plat_get_cpu_unum) {
3408*0Sstevel@tonic-gate 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3409*0Sstevel@tonic-gate 		    != 0)
3410*0Sstevel@tonic-gate 			return (ret);
3411*0Sstevel@tonic-gate 	} else {
3412*0Sstevel@tonic-gate 		return (ENOTSUP);
3413*0Sstevel@tonic-gate 	}
3414*0Sstevel@tonic-gate 
3415*0Sstevel@tonic-gate 	if (*lenp >= buflen)
3416*0Sstevel@tonic-gate 		return (ENAMETOOLONG);
3417*0Sstevel@tonic-gate 
3418*0Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
3419*0Sstevel@tonic-gate 
3420*0Sstevel@tonic-gate 	return (0);
3421*0Sstevel@tonic-gate }
3422*0Sstevel@tonic-gate 
3423*0Sstevel@tonic-gate /*
3424*0Sstevel@tonic-gate  * This routine exports the name buffer size.
3425*0Sstevel@tonic-gate  */
3426*0Sstevel@tonic-gate size_t
3427*0Sstevel@tonic-gate cpu_get_name_bufsize()
3428*0Sstevel@tonic-gate {
3429*0Sstevel@tonic-gate 	return (UNUM_NAMLEN);
3430*0Sstevel@tonic-gate }
3431*0Sstevel@tonic-gate 
3432*0Sstevel@tonic-gate /*
3433*0Sstevel@tonic-gate  * Historical function, apparantly not used.
3434*0Sstevel@tonic-gate  */
3435*0Sstevel@tonic-gate /* ARGSUSED */
3436*0Sstevel@tonic-gate void
3437*0Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3438*0Sstevel@tonic-gate {}
3439*0Sstevel@tonic-gate 
3440*0Sstevel@tonic-gate /*
3441*0Sstevel@tonic-gate  * Historical function only called for SBus errors in debugging.
3442*0Sstevel@tonic-gate  */
3443*0Sstevel@tonic-gate /*ARGSUSED*/
3444*0Sstevel@tonic-gate void
3445*0Sstevel@tonic-gate read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3446*0Sstevel@tonic-gate {}
3447*0Sstevel@tonic-gate 
3448*0Sstevel@tonic-gate /*
3449*0Sstevel@tonic-gate  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3450*0Sstevel@tonic-gate  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3451*0Sstevel@tonic-gate  * an async fault structure argument is passed in, the captured error state
3452*0Sstevel@tonic-gate  * (AFSR, AFAR) info will be returned in the structure.
3453*0Sstevel@tonic-gate  */
3454*0Sstevel@tonic-gate int
3455*0Sstevel@tonic-gate clear_errors(ch_async_flt_t *ch_flt)
3456*0Sstevel@tonic-gate {
3457*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3458*0Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
3459*0Sstevel@tonic-gate 
3460*0Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
3461*0Sstevel@tonic-gate 
3462*0Sstevel@tonic-gate 	if (ch_flt != NULL) {
3463*0Sstevel@tonic-gate 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3464*0Sstevel@tonic-gate 		aflt->flt_addr = cpu_error_regs.afar;
3465*0Sstevel@tonic-gate 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3466*0Sstevel@tonic-gate 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3467*0Sstevel@tonic-gate 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3468*0Sstevel@tonic-gate #if defined(SERRANO)
3469*0Sstevel@tonic-gate 		ch_flt->afar2 = cpu_error_regs.afar2;
3470*0Sstevel@tonic-gate #endif	/* SERRANO */
3471*0Sstevel@tonic-gate 	}
3472*0Sstevel@tonic-gate 
3473*0Sstevel@tonic-gate 	set_cpu_error_state(&cpu_error_regs);
3474*0Sstevel@tonic-gate 
3475*0Sstevel@tonic-gate 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3476*0Sstevel@tonic-gate 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3477*0Sstevel@tonic-gate }
3478*0Sstevel@tonic-gate 
3479*0Sstevel@tonic-gate /*
3480*0Sstevel@tonic-gate  * Clear any AFSR error bits, and check for persistence.
3481*0Sstevel@tonic-gate  *
3482*0Sstevel@tonic-gate  * It would be desirable to also insist that syndrome match.  PCI handling
3483*0Sstevel@tonic-gate  * has already filled flt_synd.  For errors trapped by CPU we only fill
3484*0Sstevel@tonic-gate  * flt_synd when we queue the event, so we do not have a valid flt_synd
3485*0Sstevel@tonic-gate  * during initial classification (it is valid if we're called as part of
3486*0Sstevel@tonic-gate  * subsequent low-pil additional classification attempts).  We could try
3487*0Sstevel@tonic-gate  * to determine which syndrome to use: we know we're only called for
3488*0Sstevel@tonic-gate  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3489*0Sstevel@tonic-gate  * would be esynd/none and esynd/msynd, respectively.  If that is
3490*0Sstevel@tonic-gate  * implemented then what do we do in the case that we do experience an
3491*0Sstevel@tonic-gate  * error on the same afar but with different syndrome?  At the very least
3492*0Sstevel@tonic-gate  * we should count such occurences.  Anyway, for now, we'll leave it as
3493*0Sstevel@tonic-gate  * it has been for ages.
3494*0Sstevel@tonic-gate  */
3495*0Sstevel@tonic-gate static int
3496*0Sstevel@tonic-gate clear_ecc(struct async_flt *aflt)
3497*0Sstevel@tonic-gate {
3498*0Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
3499*0Sstevel@tonic-gate 
3500*0Sstevel@tonic-gate 	/*
3501*0Sstevel@tonic-gate 	 * Snapshot the AFSR and AFAR and clear any errors
3502*0Sstevel@tonic-gate 	 */
3503*0Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
3504*0Sstevel@tonic-gate 	set_cpu_error_state(&cpu_error_regs);
3505*0Sstevel@tonic-gate 
3506*0Sstevel@tonic-gate 	/*
3507*0Sstevel@tonic-gate 	 * If any of the same memory access error bits are still on and
3508*0Sstevel@tonic-gate 	 * the AFAR matches, return that the error is persistent.
3509*0Sstevel@tonic-gate 	 */
3510*0Sstevel@tonic-gate 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3511*0Sstevel@tonic-gate 	    cpu_error_regs.afar == aflt->flt_addr);
3512*0Sstevel@tonic-gate }
3513*0Sstevel@tonic-gate 
3514*0Sstevel@tonic-gate /*
3515*0Sstevel@tonic-gate  * Turn off all cpu error detection, normally only used for panics.
3516*0Sstevel@tonic-gate  */
3517*0Sstevel@tonic-gate void
3518*0Sstevel@tonic-gate cpu_disable_errors(void)
3519*0Sstevel@tonic-gate {
3520*0Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3521*0Sstevel@tonic-gate }
3522*0Sstevel@tonic-gate 
3523*0Sstevel@tonic-gate /*
3524*0Sstevel@tonic-gate  * Enable errors.
3525*0Sstevel@tonic-gate  */
3526*0Sstevel@tonic-gate void
3527*0Sstevel@tonic-gate cpu_enable_errors(void)
3528*0Sstevel@tonic-gate {
3529*0Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3530*0Sstevel@tonic-gate }
3531*0Sstevel@tonic-gate 
3532*0Sstevel@tonic-gate /*
3533*0Sstevel@tonic-gate  * Flush the entire ecache using displacement flush by reading through a
3534*0Sstevel@tonic-gate  * physical address range twice as large as the Ecache.
3535*0Sstevel@tonic-gate  */
3536*0Sstevel@tonic-gate void
3537*0Sstevel@tonic-gate cpu_flush_ecache(void)
3538*0Sstevel@tonic-gate {
3539*0Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3540*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
3541*0Sstevel@tonic-gate }
3542*0Sstevel@tonic-gate 
3543*0Sstevel@tonic-gate /*
3544*0Sstevel@tonic-gate  * Return CPU E$ set size - E$ size divided by the associativity.
3545*0Sstevel@tonic-gate  * We use this function in places where the CPU_PRIVATE ptr may not be
3546*0Sstevel@tonic-gate  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3547*0Sstevel@tonic-gate  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3548*0Sstevel@tonic-gate  * up before the kernel switches from OBP's to the kernel's trap table, so
3549*0Sstevel@tonic-gate  * we don't have to worry about cpunodes being unitialized.
3550*0Sstevel@tonic-gate  */
3551*0Sstevel@tonic-gate int
3552*0Sstevel@tonic-gate cpu_ecache_set_size(struct cpu *cp)
3553*0Sstevel@tonic-gate {
3554*0Sstevel@tonic-gate 	if (CPU_PRIVATE(cp))
3555*0Sstevel@tonic-gate 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3556*0Sstevel@tonic-gate 
3557*0Sstevel@tonic-gate 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3558*0Sstevel@tonic-gate }
3559*0Sstevel@tonic-gate 
3560*0Sstevel@tonic-gate /*
3561*0Sstevel@tonic-gate  * Flush Ecache line.
3562*0Sstevel@tonic-gate  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3563*0Sstevel@tonic-gate  * Uses normal displacement flush for Cheetah.
3564*0Sstevel@tonic-gate  */
3565*0Sstevel@tonic-gate static void
3566*0Sstevel@tonic-gate cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3567*0Sstevel@tonic-gate {
3568*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3569*0Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
3570*0Sstevel@tonic-gate 
3571*0Sstevel@tonic-gate 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3572*0Sstevel@tonic-gate }
3573*0Sstevel@tonic-gate 
3574*0Sstevel@tonic-gate /*
3575*0Sstevel@tonic-gate  * Scrub physical address.
3576*0Sstevel@tonic-gate  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3577*0Sstevel@tonic-gate  * Ecache or direct-mapped Ecache.
3578*0Sstevel@tonic-gate  */
3579*0Sstevel@tonic-gate static void
3580*0Sstevel@tonic-gate cpu_scrubphys(struct async_flt *aflt)
3581*0Sstevel@tonic-gate {
3582*0Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
3583*0Sstevel@tonic-gate 
3584*0Sstevel@tonic-gate 	scrubphys(aflt->flt_addr, ec_set_size);
3585*0Sstevel@tonic-gate }
3586*0Sstevel@tonic-gate 
3587*0Sstevel@tonic-gate /*
3588*0Sstevel@tonic-gate  * Clear physical address.
3589*0Sstevel@tonic-gate  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3590*0Sstevel@tonic-gate  * Ecache or direct-mapped Ecache.
3591*0Sstevel@tonic-gate  */
3592*0Sstevel@tonic-gate void
3593*0Sstevel@tonic-gate cpu_clearphys(struct async_flt *aflt)
3594*0Sstevel@tonic-gate {
3595*0Sstevel@tonic-gate 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3596*0Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
3597*0Sstevel@tonic-gate 
3598*0Sstevel@tonic-gate 
3599*0Sstevel@tonic-gate 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3600*0Sstevel@tonic-gate }
3601*0Sstevel@tonic-gate 
3602*0Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
3603*0Sstevel@tonic-gate /*
3604*0Sstevel@tonic-gate  * Check for a matching valid line in all the sets.
3605*0Sstevel@tonic-gate  * If found, return set# + 1. Otherwise return 0.
3606*0Sstevel@tonic-gate  */
3607*0Sstevel@tonic-gate static int
3608*0Sstevel@tonic-gate cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3609*0Sstevel@tonic-gate {
3610*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3611*0Sstevel@tonic-gate 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3612*0Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
3613*0Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3614*0Sstevel@tonic-gate 	int nway = cpu_ecache_nway();
3615*0Sstevel@tonic-gate 	int i;
3616*0Sstevel@tonic-gate 
3617*0Sstevel@tonic-gate 	for (i = 0; i < nway; i++, ecp++) {
3618*0Sstevel@tonic-gate 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3619*0Sstevel@tonic-gate 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3620*0Sstevel@tonic-gate 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3621*0Sstevel@tonic-gate 			return (i+1);
3622*0Sstevel@tonic-gate 	}
3623*0Sstevel@tonic-gate 	return (0);
3624*0Sstevel@tonic-gate }
3625*0Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
3626*0Sstevel@tonic-gate 
3627*0Sstevel@tonic-gate /*
3628*0Sstevel@tonic-gate  * Check whether a line in the given logout info matches the specified
3629*0Sstevel@tonic-gate  * fault address.  If reqval is set then the line must not be Invalid.
3630*0Sstevel@tonic-gate  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3631*0Sstevel@tonic-gate  * set to 2 for l2$ or 3 for l3$.
3632*0Sstevel@tonic-gate  */
3633*0Sstevel@tonic-gate static int
3634*0Sstevel@tonic-gate cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3635*0Sstevel@tonic-gate {
3636*0Sstevel@tonic-gate 	ch_diag_data_t *cdp = data;
3637*0Sstevel@tonic-gate 	ch_ec_data_t *ecp;
3638*0Sstevel@tonic-gate 	int totalsize, ec_set_size;
3639*0Sstevel@tonic-gate 	int i, ways;
3640*0Sstevel@tonic-gate 	int match = 0;
3641*0Sstevel@tonic-gate 	int tagvalid;
3642*0Sstevel@tonic-gate 	uint64_t addr, tagpa;
3643*0Sstevel@tonic-gate 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3644*0Sstevel@tonic-gate 
3645*0Sstevel@tonic-gate 	/*
3646*0Sstevel@tonic-gate 	 * Check the l2$ logout data
3647*0Sstevel@tonic-gate 	 */
3648*0Sstevel@tonic-gate 	if (ispanther) {
3649*0Sstevel@tonic-gate 		ecp = &cdp->chd_l2_data[0];
3650*0Sstevel@tonic-gate 		ec_set_size = PN_L2_SET_SIZE;
3651*0Sstevel@tonic-gate 		ways = PN_L2_NWAYS;
3652*0Sstevel@tonic-gate 	} else {
3653*0Sstevel@tonic-gate 		ecp = &cdp->chd_ec_data[0];
3654*0Sstevel@tonic-gate 		ec_set_size = cpu_ecache_set_size(CPU);
3655*0Sstevel@tonic-gate 		ways = cpu_ecache_nway();
3656*0Sstevel@tonic-gate 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3657*0Sstevel@tonic-gate 	}
3658*0Sstevel@tonic-gate 	/* remove low order PA bits from fault address not used in PA tag */
3659*0Sstevel@tonic-gate 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3660*0Sstevel@tonic-gate 	for (i = 0; i < ways; i++, ecp++) {
3661*0Sstevel@tonic-gate 		if (ispanther) {
3662*0Sstevel@tonic-gate 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3663*0Sstevel@tonic-gate 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3664*0Sstevel@tonic-gate 		} else {
3665*0Sstevel@tonic-gate 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3666*0Sstevel@tonic-gate 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3667*0Sstevel@tonic-gate 			    ecp->ec_tag);
3668*0Sstevel@tonic-gate 		}
3669*0Sstevel@tonic-gate 		if (tagpa == addr && (!reqval || tagvalid)) {
3670*0Sstevel@tonic-gate 			match = i + 1;
3671*0Sstevel@tonic-gate 			*level = 2;
3672*0Sstevel@tonic-gate 			break;
3673*0Sstevel@tonic-gate 		}
3674*0Sstevel@tonic-gate 	}
3675*0Sstevel@tonic-gate 
3676*0Sstevel@tonic-gate 	if (match || !ispanther)
3677*0Sstevel@tonic-gate 		return (match);
3678*0Sstevel@tonic-gate 
3679*0Sstevel@tonic-gate 	/* For Panther we also check the l3$ */
3680*0Sstevel@tonic-gate 	ecp = &cdp->chd_ec_data[0];
3681*0Sstevel@tonic-gate 	ec_set_size = PN_L3_SET_SIZE;
3682*0Sstevel@tonic-gate 	ways = PN_L3_NWAYS;
3683*0Sstevel@tonic-gate 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3684*0Sstevel@tonic-gate 
3685*0Sstevel@tonic-gate 	for (i = 0; i < ways; i++, ecp++) {
3686*0Sstevel@tonic-gate 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3687*0Sstevel@tonic-gate 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3688*0Sstevel@tonic-gate 			match = i + 1;
3689*0Sstevel@tonic-gate 			*level = 3;
3690*0Sstevel@tonic-gate 			break;
3691*0Sstevel@tonic-gate 		}
3692*0Sstevel@tonic-gate 	}
3693*0Sstevel@tonic-gate 
3694*0Sstevel@tonic-gate 	return (match);
3695*0Sstevel@tonic-gate }
3696*0Sstevel@tonic-gate 
3697*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
3698*0Sstevel@tonic-gate /*
3699*0Sstevel@tonic-gate  * Record information related to the source of an Dcache Parity Error.
3700*0Sstevel@tonic-gate  */
3701*0Sstevel@tonic-gate static void
3702*0Sstevel@tonic-gate cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3703*0Sstevel@tonic-gate {
3704*0Sstevel@tonic-gate 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3705*0Sstevel@tonic-gate 	int index;
3706*0Sstevel@tonic-gate 
3707*0Sstevel@tonic-gate 	/*
3708*0Sstevel@tonic-gate 	 * Since instruction decode cannot be done at high PIL
3709*0Sstevel@tonic-gate 	 * just examine the entire Dcache to locate the error.
3710*0Sstevel@tonic-gate 	 */
3711*0Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3712*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = -1;
3713*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = -1;
3714*0Sstevel@tonic-gate 	}
3715*0Sstevel@tonic-gate 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3716*0Sstevel@tonic-gate 		cpu_dcache_parity_check(ch_flt, index);
3717*0Sstevel@tonic-gate }
3718*0Sstevel@tonic-gate 
3719*0Sstevel@tonic-gate /*
3720*0Sstevel@tonic-gate  * Check all ways of the Dcache at a specified index for good parity.
3721*0Sstevel@tonic-gate  */
3722*0Sstevel@tonic-gate static void
3723*0Sstevel@tonic-gate cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3724*0Sstevel@tonic-gate {
3725*0Sstevel@tonic-gate 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3726*0Sstevel@tonic-gate 	uint64_t parity_bits, pbits, data_word;
3727*0Sstevel@tonic-gate 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3728*0Sstevel@tonic-gate 	int way, word, data_byte;
3729*0Sstevel@tonic-gate 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3730*0Sstevel@tonic-gate 	ch_dc_data_t tmp_dcp;
3731*0Sstevel@tonic-gate 
3732*0Sstevel@tonic-gate 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3733*0Sstevel@tonic-gate 		/*
3734*0Sstevel@tonic-gate 		 * Perform diagnostic read.
3735*0Sstevel@tonic-gate 		 */
3736*0Sstevel@tonic-gate 		get_dcache_dtag(index + way * dc_set_size,
3737*0Sstevel@tonic-gate 				(uint64_t *)&tmp_dcp);
3738*0Sstevel@tonic-gate 
3739*0Sstevel@tonic-gate 		/*
3740*0Sstevel@tonic-gate 		 * Check tag for even parity.
3741*0Sstevel@tonic-gate 		 * Sum of 1 bits (including parity bit) should be even.
3742*0Sstevel@tonic-gate 		 */
3743*0Sstevel@tonic-gate 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3744*0Sstevel@tonic-gate 			/*
3745*0Sstevel@tonic-gate 			 * If this is the first error log detailed information
3746*0Sstevel@tonic-gate 			 * about it and check the snoop tag. Otherwise just
3747*0Sstevel@tonic-gate 			 * record the fact that we found another error.
3748*0Sstevel@tonic-gate 			 */
3749*0Sstevel@tonic-gate 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3750*0Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_way = way;
3751*0Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_cache =
3752*0Sstevel@tonic-gate 				    CPU_DC_PARITY;
3753*0Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3754*0Sstevel@tonic-gate 
3755*0Sstevel@tonic-gate 				if (popc64(tmp_dcp.dc_sntag &
3756*0Sstevel@tonic-gate 						CHP_DCSNTAG_PARMASK) & 1) {
3757*0Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_tag |=
3758*0Sstevel@tonic-gate 								CHP_DC_SNTAG;
3759*0Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_lcnt++;
3760*0Sstevel@tonic-gate 				}
3761*0Sstevel@tonic-gate 
3762*0Sstevel@tonic-gate 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3763*0Sstevel@tonic-gate 			}
3764*0Sstevel@tonic-gate 
3765*0Sstevel@tonic-gate 			ch_flt->parity_data.dpe.cpl_lcnt++;
3766*0Sstevel@tonic-gate 		}
3767*0Sstevel@tonic-gate 
3768*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3769*0Sstevel@tonic-gate 			/*
3770*0Sstevel@tonic-gate 			 * Panther has more parity bits than the other
3771*0Sstevel@tonic-gate 			 * processors for covering dcache data and so each
3772*0Sstevel@tonic-gate 			 * byte of data in each word has its own parity bit.
3773*0Sstevel@tonic-gate 			 */
3774*0Sstevel@tonic-gate 			parity_bits = tmp_dcp.dc_pn_data_parity;
3775*0Sstevel@tonic-gate 			for (word = 0; word < 4; word++) {
3776*0Sstevel@tonic-gate 				data_word = tmp_dcp.dc_data[word];
3777*0Sstevel@tonic-gate 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3778*0Sstevel@tonic-gate 				for (data_byte = 0; data_byte < 8;
3779*0Sstevel@tonic-gate 				    data_byte++) {
3780*0Sstevel@tonic-gate 					if (((popc64(data_word &
3781*0Sstevel@tonic-gate 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3782*0Sstevel@tonic-gate 					    (pbits & 1)) {
3783*0Sstevel@tonic-gate 						cpu_record_dc_data_parity(
3784*0Sstevel@tonic-gate 						ch_flt, dcp, &tmp_dcp, way,
3785*0Sstevel@tonic-gate 						word);
3786*0Sstevel@tonic-gate 					}
3787*0Sstevel@tonic-gate 					pbits >>= 1;
3788*0Sstevel@tonic-gate 					data_word >>= 8;
3789*0Sstevel@tonic-gate 				}
3790*0Sstevel@tonic-gate 				parity_bits >>= 8;
3791*0Sstevel@tonic-gate 			}
3792*0Sstevel@tonic-gate 		} else {
3793*0Sstevel@tonic-gate 			/*
3794*0Sstevel@tonic-gate 			 * Check data array for even parity.
3795*0Sstevel@tonic-gate 			 * The 8 parity bits are grouped into 4 pairs each
3796*0Sstevel@tonic-gate 			 * of which covers a 64-bit word.  The endianness is
3797*0Sstevel@tonic-gate 			 * reversed -- the low-order parity bits cover the
3798*0Sstevel@tonic-gate 			 * high-order data words.
3799*0Sstevel@tonic-gate 			 */
3800*0Sstevel@tonic-gate 			parity_bits = tmp_dcp.dc_utag >> 8;
3801*0Sstevel@tonic-gate 			for (word = 0; word < 4; word++) {
3802*0Sstevel@tonic-gate 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3803*0Sstevel@tonic-gate 				if ((popc64(tmp_dcp.dc_data[word]) +
3804*0Sstevel@tonic-gate 				    parity_bits_popc[pbits]) & 1) {
3805*0Sstevel@tonic-gate 					cpu_record_dc_data_parity(ch_flt, dcp,
3806*0Sstevel@tonic-gate 					    &tmp_dcp, way, word);
3807*0Sstevel@tonic-gate 				}
3808*0Sstevel@tonic-gate 			}
3809*0Sstevel@tonic-gate 		}
3810*0Sstevel@tonic-gate 	}
3811*0Sstevel@tonic-gate }
3812*0Sstevel@tonic-gate 
3813*0Sstevel@tonic-gate static void
3814*0Sstevel@tonic-gate cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3815*0Sstevel@tonic-gate     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3816*0Sstevel@tonic-gate {
3817*0Sstevel@tonic-gate 	/*
3818*0Sstevel@tonic-gate 	 * If this is the first error log detailed information about it.
3819*0Sstevel@tonic-gate 	 * Otherwise just record the fact that we found another error.
3820*0Sstevel@tonic-gate 	 */
3821*0Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3822*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = way;
3823*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3824*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3825*0Sstevel@tonic-gate 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3826*0Sstevel@tonic-gate 	}
3827*0Sstevel@tonic-gate 	ch_flt->parity_data.dpe.cpl_lcnt++;
3828*0Sstevel@tonic-gate }
3829*0Sstevel@tonic-gate 
3830*0Sstevel@tonic-gate /*
3831*0Sstevel@tonic-gate  * Record information related to the source of an Icache Parity Error.
3832*0Sstevel@tonic-gate  *
3833*0Sstevel@tonic-gate  * Called with the Icache disabled so any diagnostic accesses are safe.
3834*0Sstevel@tonic-gate  */
3835*0Sstevel@tonic-gate static void
3836*0Sstevel@tonic-gate cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3837*0Sstevel@tonic-gate {
3838*0Sstevel@tonic-gate 	int	ic_set_size;
3839*0Sstevel@tonic-gate 	int	ic_linesize;
3840*0Sstevel@tonic-gate 	int	index;
3841*0Sstevel@tonic-gate 
3842*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU)) {
3843*0Sstevel@tonic-gate 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3844*0Sstevel@tonic-gate 		    CH_ICACHE_NWAY;
3845*0Sstevel@tonic-gate 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3846*0Sstevel@tonic-gate 	} else {
3847*0Sstevel@tonic-gate 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3848*0Sstevel@tonic-gate 		ic_linesize = icache_linesize;
3849*0Sstevel@tonic-gate 	}
3850*0Sstevel@tonic-gate 
3851*0Sstevel@tonic-gate 	ch_flt->parity_data.ipe.cpl_way = -1;
3852*0Sstevel@tonic-gate 	ch_flt->parity_data.ipe.cpl_off = -1;
3853*0Sstevel@tonic-gate 
3854*0Sstevel@tonic-gate 	for (index = 0; index < ic_set_size; index += ic_linesize)
3855*0Sstevel@tonic-gate 		cpu_icache_parity_check(ch_flt, index);
3856*0Sstevel@tonic-gate }
3857*0Sstevel@tonic-gate 
3858*0Sstevel@tonic-gate /*
3859*0Sstevel@tonic-gate  * Check all ways of the Icache at a specified index for good parity.
3860*0Sstevel@tonic-gate  */
3861*0Sstevel@tonic-gate static void
3862*0Sstevel@tonic-gate cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
3863*0Sstevel@tonic-gate {
3864*0Sstevel@tonic-gate 	uint64_t parmask, pn_inst_parity;
3865*0Sstevel@tonic-gate 	int ic_set_size;
3866*0Sstevel@tonic-gate 	int ic_linesize;
3867*0Sstevel@tonic-gate 	int flt_index, way, instr, num_instr;
3868*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3869*0Sstevel@tonic-gate 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
3870*0Sstevel@tonic-gate 	ch_ic_data_t tmp_icp;
3871*0Sstevel@tonic-gate 
3872*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU)) {
3873*0Sstevel@tonic-gate 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3874*0Sstevel@tonic-gate 		    CH_ICACHE_NWAY;
3875*0Sstevel@tonic-gate 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3876*0Sstevel@tonic-gate 	} else {
3877*0Sstevel@tonic-gate 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3878*0Sstevel@tonic-gate 		ic_linesize = icache_linesize;
3879*0Sstevel@tonic-gate 	}
3880*0Sstevel@tonic-gate 
3881*0Sstevel@tonic-gate 	/*
3882*0Sstevel@tonic-gate 	 * Panther has twice as many instructions per icache line and the
3883*0Sstevel@tonic-gate 	 * instruction parity bit is in a different location.
3884*0Sstevel@tonic-gate 	 */
3885*0Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3886*0Sstevel@tonic-gate 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
3887*0Sstevel@tonic-gate 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
3888*0Sstevel@tonic-gate 	} else {
3889*0Sstevel@tonic-gate 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
3890*0Sstevel@tonic-gate 		pn_inst_parity = 0;
3891*0Sstevel@tonic-gate 	}
3892*0Sstevel@tonic-gate 
3893*0Sstevel@tonic-gate 	/*
3894*0Sstevel@tonic-gate 	 * Index at which we expect to find the parity error.
3895*0Sstevel@tonic-gate 	 */
3896*0Sstevel@tonic-gate 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
3897*0Sstevel@tonic-gate 
3898*0Sstevel@tonic-gate 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
3899*0Sstevel@tonic-gate 		/*
3900*0Sstevel@tonic-gate 		 * Diagnostic reads expect address argument in ASI format.
3901*0Sstevel@tonic-gate 		 */
3902*0Sstevel@tonic-gate 		get_icache_dtag(2 * (index + way * ic_set_size),
3903*0Sstevel@tonic-gate 				(uint64_t *)&tmp_icp);
3904*0Sstevel@tonic-gate 
3905*0Sstevel@tonic-gate 		/*
3906*0Sstevel@tonic-gate 		 * If this is the index in which we expect to find the
3907*0Sstevel@tonic-gate 		 * error log detailed information about each of the ways.
3908*0Sstevel@tonic-gate 		 * This information will be displayed later if we can't
3909*0Sstevel@tonic-gate 		 * determine the exact way in which the error is located.
3910*0Sstevel@tonic-gate 		 */
3911*0Sstevel@tonic-gate 		if (flt_index == index)
3912*0Sstevel@tonic-gate 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
3913*0Sstevel@tonic-gate 
3914*0Sstevel@tonic-gate 		/*
3915*0Sstevel@tonic-gate 		 * Check tag for even parity.
3916*0Sstevel@tonic-gate 		 * Sum of 1 bits (including parity bit) should be even.
3917*0Sstevel@tonic-gate 		 */
3918*0Sstevel@tonic-gate 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
3919*0Sstevel@tonic-gate 			/*
3920*0Sstevel@tonic-gate 			 * If this way is the one in which we expected
3921*0Sstevel@tonic-gate 			 * to find the error record the way and check the
3922*0Sstevel@tonic-gate 			 * snoop tag. Otherwise just record the fact we
3923*0Sstevel@tonic-gate 			 * found another error.
3924*0Sstevel@tonic-gate 			 */
3925*0Sstevel@tonic-gate 			if (flt_index == index) {
3926*0Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_way = way;
3927*0Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
3928*0Sstevel@tonic-gate 
3929*0Sstevel@tonic-gate 				if (popc64(tmp_icp.ic_sntag &
3930*0Sstevel@tonic-gate 						CHP_ICSNTAG_PARMASK) & 1) {
3931*0Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_tag |=
3932*0Sstevel@tonic-gate 								CHP_IC_SNTAG;
3933*0Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_lcnt++;
3934*0Sstevel@tonic-gate 				}
3935*0Sstevel@tonic-gate 
3936*0Sstevel@tonic-gate 			}
3937*0Sstevel@tonic-gate 			ch_flt->parity_data.ipe.cpl_lcnt++;
3938*0Sstevel@tonic-gate 			continue;
3939*0Sstevel@tonic-gate 		}
3940*0Sstevel@tonic-gate 
3941*0Sstevel@tonic-gate 		/*
3942*0Sstevel@tonic-gate 		 * Check instruction data for even parity.
3943*0Sstevel@tonic-gate 		 * Bits participating in parity differ for PC-relative
3944*0Sstevel@tonic-gate 		 * versus non-PC-relative instructions.
3945*0Sstevel@tonic-gate 		 */
3946*0Sstevel@tonic-gate 		for (instr = 0; instr < num_instr; instr++) {
3947*0Sstevel@tonic-gate 			parmask = (tmp_icp.ic_data[instr] &
3948*0Sstevel@tonic-gate 					CH_ICDATA_PRED_ISPCREL) ?
3949*0Sstevel@tonic-gate 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
3950*0Sstevel@tonic-gate 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
3951*0Sstevel@tonic-gate 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
3952*0Sstevel@tonic-gate 				/*
3953*0Sstevel@tonic-gate 				 * If this way is the one in which we expected
3954*0Sstevel@tonic-gate 				 * to find the error record the way and offset.
3955*0Sstevel@tonic-gate 				 * Otherwise just log the fact we found another
3956*0Sstevel@tonic-gate 				 * error.
3957*0Sstevel@tonic-gate 				 */
3958*0Sstevel@tonic-gate 				if (flt_index == index) {
3959*0Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_way = way;
3960*0Sstevel@tonic-gate 					ch_flt->parity_data.ipe.cpl_off =
3961*0Sstevel@tonic-gate 								instr * 4;
3962*0Sstevel@tonic-gate 				}
3963*0Sstevel@tonic-gate 				ch_flt->parity_data.ipe.cpl_lcnt++;
3964*0Sstevel@tonic-gate 				continue;
3965*0Sstevel@tonic-gate 			}
3966*0Sstevel@tonic-gate 		}
3967*0Sstevel@tonic-gate 	}
3968*0Sstevel@tonic-gate }
3969*0Sstevel@tonic-gate 
3970*0Sstevel@tonic-gate /*
3971*0Sstevel@tonic-gate  * Record information related to the source of an Pcache Parity Error.
3972*0Sstevel@tonic-gate  */
3973*0Sstevel@tonic-gate static void
3974*0Sstevel@tonic-gate cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
3975*0Sstevel@tonic-gate {
3976*0Sstevel@tonic-gate 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3977*0Sstevel@tonic-gate 	int index;
3978*0Sstevel@tonic-gate 
3979*0Sstevel@tonic-gate 	/*
3980*0Sstevel@tonic-gate 	 * Since instruction decode cannot be done at high PIL just
3981*0Sstevel@tonic-gate 	 * examine the entire Pcache to check for any parity errors.
3982*0Sstevel@tonic-gate 	 */
3983*0Sstevel@tonic-gate 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3984*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_way = -1;
3985*0Sstevel@tonic-gate 		ch_flt->parity_data.dpe.cpl_off = -1;
3986*0Sstevel@tonic-gate 	}
3987*0Sstevel@tonic-gate 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
3988*0Sstevel@tonic-gate 		cpu_pcache_parity_check(ch_flt, index);
3989*0Sstevel@tonic-gate }
3990*0Sstevel@tonic-gate 
3991*0Sstevel@tonic-gate /*
3992*0Sstevel@tonic-gate  * Check all ways of the Pcache at a specified index for good parity.
3993*0Sstevel@tonic-gate  */
3994*0Sstevel@tonic-gate static void
3995*0Sstevel@tonic-gate cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
3996*0Sstevel@tonic-gate {
3997*0Sstevel@tonic-gate 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3998*0Sstevel@tonic-gate 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
3999*0Sstevel@tonic-gate 	int way, word, pbit, parity_bits;
4000*0Sstevel@tonic-gate 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4001*0Sstevel@tonic-gate 	ch_pc_data_t tmp_pcp;
4002*0Sstevel@tonic-gate 
4003*0Sstevel@tonic-gate 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4004*0Sstevel@tonic-gate 		/*
4005*0Sstevel@tonic-gate 		 * Perform diagnostic read.
4006*0Sstevel@tonic-gate 		 */
4007*0Sstevel@tonic-gate 		get_pcache_dtag(index + way * pc_set_size,
4008*0Sstevel@tonic-gate 				(uint64_t *)&tmp_pcp);
4009*0Sstevel@tonic-gate 		/*
4010*0Sstevel@tonic-gate 		 * Check data array for odd parity. There are 8 parity
4011*0Sstevel@tonic-gate 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4012*0Sstevel@tonic-gate 		 * of those bits covers exactly 8 bytes of the data
4013*0Sstevel@tonic-gate 		 * array:
4014*0Sstevel@tonic-gate 		 *
4015*0Sstevel@tonic-gate 		 *	parity bit	P$ data bytes covered
4016*0Sstevel@tonic-gate 		 *	----------	---------------------
4017*0Sstevel@tonic-gate 		 *	50		63:56
4018*0Sstevel@tonic-gate 		 *	51		55:48
4019*0Sstevel@tonic-gate 		 *	52		47:40
4020*0Sstevel@tonic-gate 		 *	53		39:32
4021*0Sstevel@tonic-gate 		 *	54		31:24
4022*0Sstevel@tonic-gate 		 *	55		23:16
4023*0Sstevel@tonic-gate 		 *	56		15:8
4024*0Sstevel@tonic-gate 		 *	57		7:0
4025*0Sstevel@tonic-gate 		 */
4026*0Sstevel@tonic-gate 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4027*0Sstevel@tonic-gate 		for (word = 0; word < pc_data_words; word++) {
4028*0Sstevel@tonic-gate 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4029*0Sstevel@tonic-gate 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4030*0Sstevel@tonic-gate 				/*
4031*0Sstevel@tonic-gate 				 * If this is the first error log detailed
4032*0Sstevel@tonic-gate 				 * information about it. Otherwise just record
4033*0Sstevel@tonic-gate 				 * the fact that we found another error.
4034*0Sstevel@tonic-gate 				 */
4035*0Sstevel@tonic-gate 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4036*0Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_way = way;
4037*0Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_cache =
4038*0Sstevel@tonic-gate 					    CPU_PC_PARITY;
4039*0Sstevel@tonic-gate 					ch_flt->parity_data.dpe.cpl_off =
4040*0Sstevel@tonic-gate 					    word * sizeof (uint64_t);
4041*0Sstevel@tonic-gate 					bcopy(&tmp_pcp, pcp,
4042*0Sstevel@tonic-gate 							sizeof (ch_pc_data_t));
4043*0Sstevel@tonic-gate 				}
4044*0Sstevel@tonic-gate 				ch_flt->parity_data.dpe.cpl_lcnt++;
4045*0Sstevel@tonic-gate 			}
4046*0Sstevel@tonic-gate 		}
4047*0Sstevel@tonic-gate 	}
4048*0Sstevel@tonic-gate }
4049*0Sstevel@tonic-gate 
4050*0Sstevel@tonic-gate 
4051*0Sstevel@tonic-gate /*
4052*0Sstevel@tonic-gate  * Add L1 Data cache data to the ereport payload.
4053*0Sstevel@tonic-gate  */
4054*0Sstevel@tonic-gate static void
4055*0Sstevel@tonic-gate cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4056*0Sstevel@tonic-gate {
4057*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4058*0Sstevel@tonic-gate 	ch_dc_data_t *dcp;
4059*0Sstevel@tonic-gate 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4060*0Sstevel@tonic-gate 	uint_t nelem;
4061*0Sstevel@tonic-gate 	int i, ways_to_check, ways_logged = 0;
4062*0Sstevel@tonic-gate 
4063*0Sstevel@tonic-gate 	/*
4064*0Sstevel@tonic-gate 	 * If this is an D$ fault then there may be multiple
4065*0Sstevel@tonic-gate 	 * ways captured in the ch_parity_log_t structure.
4066*0Sstevel@tonic-gate 	 * Otherwise, there will be at most one way captured
4067*0Sstevel@tonic-gate 	 * in the ch_diag_data_t struct.
4068*0Sstevel@tonic-gate 	 * Check each way to see if it should be encoded.
4069*0Sstevel@tonic-gate 	 */
4070*0Sstevel@tonic-gate 	if (ch_flt->flt_type == CPU_DC_PARITY)
4071*0Sstevel@tonic-gate 		ways_to_check = CH_DCACHE_NWAY;
4072*0Sstevel@tonic-gate 	else
4073*0Sstevel@tonic-gate 		ways_to_check = 1;
4074*0Sstevel@tonic-gate 	for (i = 0; i < ways_to_check; i++) {
4075*0Sstevel@tonic-gate 		if (ch_flt->flt_type == CPU_DC_PARITY)
4076*0Sstevel@tonic-gate 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4077*0Sstevel@tonic-gate 		else
4078*0Sstevel@tonic-gate 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4079*0Sstevel@tonic-gate 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4080*0Sstevel@tonic-gate 			bcopy(dcp, &dcdata[ways_logged],
4081*0Sstevel@tonic-gate 				sizeof (ch_dc_data_t));
4082*0Sstevel@tonic-gate 			ways_logged++;
4083*0Sstevel@tonic-gate 		}
4084*0Sstevel@tonic-gate 	}
4085*0Sstevel@tonic-gate 
4086*0Sstevel@tonic-gate 	/*
4087*0Sstevel@tonic-gate 	 * Add the dcache data to the payload.
4088*0Sstevel@tonic-gate 	 */
4089*0Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4090*0Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4091*0Sstevel@tonic-gate 	if (ways_logged != 0) {
4092*0Sstevel@tonic-gate 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4093*0Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4094*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4095*0Sstevel@tonic-gate 	}
4096*0Sstevel@tonic-gate }
4097*0Sstevel@tonic-gate 
4098*0Sstevel@tonic-gate /*
4099*0Sstevel@tonic-gate  * Add L1 Instruction cache data to the ereport payload.
4100*0Sstevel@tonic-gate  */
4101*0Sstevel@tonic-gate static void
4102*0Sstevel@tonic-gate cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4103*0Sstevel@tonic-gate {
4104*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4105*0Sstevel@tonic-gate 	ch_ic_data_t *icp;
4106*0Sstevel@tonic-gate 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4107*0Sstevel@tonic-gate 	uint_t nelem;
4108*0Sstevel@tonic-gate 	int i, ways_to_check, ways_logged = 0;
4109*0Sstevel@tonic-gate 
4110*0Sstevel@tonic-gate 	/*
4111*0Sstevel@tonic-gate 	 * If this is an I$ fault then there may be multiple
4112*0Sstevel@tonic-gate 	 * ways captured in the ch_parity_log_t structure.
4113*0Sstevel@tonic-gate 	 * Otherwise, there will be at most one way captured
4114*0Sstevel@tonic-gate 	 * in the ch_diag_data_t struct.
4115*0Sstevel@tonic-gate 	 * Check each way to see if it should be encoded.
4116*0Sstevel@tonic-gate 	 */
4117*0Sstevel@tonic-gate 	if (ch_flt->flt_type == CPU_IC_PARITY)
4118*0Sstevel@tonic-gate 		ways_to_check = CH_ICACHE_NWAY;
4119*0Sstevel@tonic-gate 	else
4120*0Sstevel@tonic-gate 		ways_to_check = 1;
4121*0Sstevel@tonic-gate 	for (i = 0; i < ways_to_check; i++) {
4122*0Sstevel@tonic-gate 		if (ch_flt->flt_type == CPU_IC_PARITY)
4123*0Sstevel@tonic-gate 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4124*0Sstevel@tonic-gate 		else
4125*0Sstevel@tonic-gate 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4126*0Sstevel@tonic-gate 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4127*0Sstevel@tonic-gate 			bcopy(icp, &icdata[ways_logged],
4128*0Sstevel@tonic-gate 				sizeof (ch_ic_data_t));
4129*0Sstevel@tonic-gate 			ways_logged++;
4130*0Sstevel@tonic-gate 		}
4131*0Sstevel@tonic-gate 	}
4132*0Sstevel@tonic-gate 
4133*0Sstevel@tonic-gate 	/*
4134*0Sstevel@tonic-gate 	 * Add the icache data to the payload.
4135*0Sstevel@tonic-gate 	 */
4136*0Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4137*0Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4138*0Sstevel@tonic-gate 	if (ways_logged != 0) {
4139*0Sstevel@tonic-gate 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4140*0Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4141*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4142*0Sstevel@tonic-gate 	}
4143*0Sstevel@tonic-gate }
4144*0Sstevel@tonic-gate 
4145*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
4146*0Sstevel@tonic-gate 
4147*0Sstevel@tonic-gate /*
4148*0Sstevel@tonic-gate  * Add ecache data to payload.
4149*0Sstevel@tonic-gate  */
4150*0Sstevel@tonic-gate static void
4151*0Sstevel@tonic-gate cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4152*0Sstevel@tonic-gate {
4153*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4154*0Sstevel@tonic-gate 	ch_ec_data_t *ecp;
4155*0Sstevel@tonic-gate 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4156*0Sstevel@tonic-gate 	uint_t nelem;
4157*0Sstevel@tonic-gate 	int i, ways_logged = 0;
4158*0Sstevel@tonic-gate 
4159*0Sstevel@tonic-gate 	/*
4160*0Sstevel@tonic-gate 	 * Check each way to see if it should be encoded
4161*0Sstevel@tonic-gate 	 * and concatinate it into a temporary buffer.
4162*0Sstevel@tonic-gate 	 */
4163*0Sstevel@tonic-gate 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4164*0Sstevel@tonic-gate 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4165*0Sstevel@tonic-gate 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4166*0Sstevel@tonic-gate 			bcopy(ecp, &ecdata[ways_logged],
4167*0Sstevel@tonic-gate 				sizeof (ch_ec_data_t));
4168*0Sstevel@tonic-gate 			ways_logged++;
4169*0Sstevel@tonic-gate 		}
4170*0Sstevel@tonic-gate 	}
4171*0Sstevel@tonic-gate 
4172*0Sstevel@tonic-gate 	/*
4173*0Sstevel@tonic-gate 	 * Panther CPUs have an additional level of cache and so
4174*0Sstevel@tonic-gate 	 * what we just collected was the L3 (ecache) and not the
4175*0Sstevel@tonic-gate 	 * L2 cache.
4176*0Sstevel@tonic-gate 	 */
4177*0Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4178*0Sstevel@tonic-gate 		/*
4179*0Sstevel@tonic-gate 		 * Add the L3 (ecache) data to the payload.
4180*0Sstevel@tonic-gate 		 */
4181*0Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4182*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4183*0Sstevel@tonic-gate 		if (ways_logged != 0) {
4184*0Sstevel@tonic-gate 			nelem = sizeof (ch_ec_data_t) /
4185*0Sstevel@tonic-gate 			    sizeof (uint64_t) * ways_logged;
4186*0Sstevel@tonic-gate 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4187*0Sstevel@tonic-gate 			    DATA_TYPE_UINT64_ARRAY, nelem,
4188*0Sstevel@tonic-gate 			    (uint64_t *)ecdata, NULL);
4189*0Sstevel@tonic-gate 		}
4190*0Sstevel@tonic-gate 
4191*0Sstevel@tonic-gate 		/*
4192*0Sstevel@tonic-gate 		 * Now collect the L2 cache.
4193*0Sstevel@tonic-gate 		 */
4194*0Sstevel@tonic-gate 		ways_logged = 0;
4195*0Sstevel@tonic-gate 		for (i = 0; i < PN_L2_NWAYS; i++) {
4196*0Sstevel@tonic-gate 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4197*0Sstevel@tonic-gate 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4198*0Sstevel@tonic-gate 				bcopy(ecp, &ecdata[ways_logged],
4199*0Sstevel@tonic-gate 				    sizeof (ch_ec_data_t));
4200*0Sstevel@tonic-gate 				ways_logged++;
4201*0Sstevel@tonic-gate 			}
4202*0Sstevel@tonic-gate 		}
4203*0Sstevel@tonic-gate 	}
4204*0Sstevel@tonic-gate 
4205*0Sstevel@tonic-gate 	/*
4206*0Sstevel@tonic-gate 	 * Add the L2 cache data to the payload.
4207*0Sstevel@tonic-gate 	 */
4208*0Sstevel@tonic-gate 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4209*0Sstevel@tonic-gate 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4210*0Sstevel@tonic-gate 	if (ways_logged != 0) {
4211*0Sstevel@tonic-gate 		nelem = sizeof (ch_ec_data_t) /
4212*0Sstevel@tonic-gate 			sizeof (uint64_t) * ways_logged;
4213*0Sstevel@tonic-gate 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4214*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4215*0Sstevel@tonic-gate 	}
4216*0Sstevel@tonic-gate }
4217*0Sstevel@tonic-gate 
4218*0Sstevel@tonic-gate /*
4219*0Sstevel@tonic-gate  * Encode the data saved in the ch_async_flt_t struct into
4220*0Sstevel@tonic-gate  * the FM ereport payload.
4221*0Sstevel@tonic-gate  */
4222*0Sstevel@tonic-gate static void
4223*0Sstevel@tonic-gate cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4224*0Sstevel@tonic-gate 	nvlist_t *resource, int *afar_status, int *synd_status)
4225*0Sstevel@tonic-gate {
4226*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4227*0Sstevel@tonic-gate 	*synd_status = AFLT_STAT_INVALID;
4228*0Sstevel@tonic-gate 	*afar_status = AFLT_STAT_INVALID;
4229*0Sstevel@tonic-gate 
4230*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4231*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4232*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4233*0Sstevel@tonic-gate 	}
4234*0Sstevel@tonic-gate 
4235*0Sstevel@tonic-gate 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4236*0Sstevel@tonic-gate 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4237*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4238*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4239*0Sstevel@tonic-gate 	}
4240*0Sstevel@tonic-gate 
4241*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4242*0Sstevel@tonic-gate 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4243*0Sstevel@tonic-gate 		    ch_flt->flt_bit);
4244*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4245*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4246*0Sstevel@tonic-gate 	}
4247*0Sstevel@tonic-gate 
4248*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4249*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4250*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4251*0Sstevel@tonic-gate 	}
4252*0Sstevel@tonic-gate 
4253*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4254*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4255*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4256*0Sstevel@tonic-gate 	}
4257*0Sstevel@tonic-gate 
4258*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4259*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4260*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4261*0Sstevel@tonic-gate 	}
4262*0Sstevel@tonic-gate 
4263*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4264*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4265*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4266*0Sstevel@tonic-gate 	}
4267*0Sstevel@tonic-gate 
4268*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4269*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4270*0Sstevel@tonic-gate 		    DATA_TYPE_BOOLEAN_VALUE,
4271*0Sstevel@tonic-gate 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4272*0Sstevel@tonic-gate 	}
4273*0Sstevel@tonic-gate 
4274*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4275*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4276*0Sstevel@tonic-gate 		    DATA_TYPE_BOOLEAN_VALUE,
4277*0Sstevel@tonic-gate 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4278*0Sstevel@tonic-gate 	}
4279*0Sstevel@tonic-gate 
4280*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4281*0Sstevel@tonic-gate 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4282*0Sstevel@tonic-gate 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4283*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4284*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4285*0Sstevel@tonic-gate 	}
4286*0Sstevel@tonic-gate 
4287*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4288*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4289*0Sstevel@tonic-gate 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4290*0Sstevel@tonic-gate 	}
4291*0Sstevel@tonic-gate 
4292*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4293*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4294*0Sstevel@tonic-gate 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4295*0Sstevel@tonic-gate 	}
4296*0Sstevel@tonic-gate 
4297*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4298*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4299*0Sstevel@tonic-gate 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4300*0Sstevel@tonic-gate 	}
4301*0Sstevel@tonic-gate 
4302*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4303*0Sstevel@tonic-gate 		cpu_payload_add_ecache(aflt, payload);
4304*0Sstevel@tonic-gate 
4305*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4306*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4307*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4308*0Sstevel@tonic-gate 	}
4309*0Sstevel@tonic-gate 
4310*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4311*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4312*0Sstevel@tonic-gate 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4313*0Sstevel@tonic-gate 	}
4314*0Sstevel@tonic-gate 
4315*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4316*0Sstevel@tonic-gate 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4317*0Sstevel@tonic-gate 		    DATA_TYPE_UINT32_ARRAY, 16,
4318*0Sstevel@tonic-gate 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4319*0Sstevel@tonic-gate 	}
4320*0Sstevel@tonic-gate 
4321*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
4322*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4323*0Sstevel@tonic-gate 		cpu_payload_add_dcache(aflt, payload);
4324*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4325*0Sstevel@tonic-gate 		cpu_payload_add_icache(aflt, payload);
4326*0Sstevel@tonic-gate #endif	/* CPU_IMP_L1_CACHE_PARITY */
4327*0Sstevel@tonic-gate 
4328*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
4329*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4330*0Sstevel@tonic-gate 		cpu_payload_add_pcache(aflt, payload);
4331*0Sstevel@tonic-gate 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4332*0Sstevel@tonic-gate 		cpu_payload_add_tlb(aflt, payload);
4333*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
4334*0Sstevel@tonic-gate 	/*
4335*0Sstevel@tonic-gate 	 * Create the FMRI that goes into the payload
4336*0Sstevel@tonic-gate 	 * and contains the unum info if necessary.
4337*0Sstevel@tonic-gate 	 */
4338*0Sstevel@tonic-gate 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4339*0Sstevel@tonic-gate 	    (*afar_status == AFLT_STAT_VALID)) {
4340*0Sstevel@tonic-gate 		char unum[UNUM_NAMLEN];
4341*0Sstevel@tonic-gate 		int len;
4342*0Sstevel@tonic-gate 
4343*0Sstevel@tonic-gate 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4344*0Sstevel@tonic-gate 		    UNUM_NAMLEN, &len) == 0) {
4345*0Sstevel@tonic-gate 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4346*0Sstevel@tonic-gate 			    NULL, unum, NULL);
4347*0Sstevel@tonic-gate 			fm_payload_set(payload,
4348*0Sstevel@tonic-gate 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4349*0Sstevel@tonic-gate 			    DATA_TYPE_NVLIST, resource, NULL);
4350*0Sstevel@tonic-gate 		}
4351*0Sstevel@tonic-gate 	}
4352*0Sstevel@tonic-gate }
4353*0Sstevel@tonic-gate 
4354*0Sstevel@tonic-gate /*
4355*0Sstevel@tonic-gate  * Initialize the way info if necessary.
4356*0Sstevel@tonic-gate  */
4357*0Sstevel@tonic-gate void
4358*0Sstevel@tonic-gate cpu_ereport_init(struct async_flt *aflt)
4359*0Sstevel@tonic-gate {
4360*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4361*0Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4362*0Sstevel@tonic-gate 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4363*0Sstevel@tonic-gate 	int i;
4364*0Sstevel@tonic-gate 
4365*0Sstevel@tonic-gate 	/*
4366*0Sstevel@tonic-gate 	 * Initialize the info in the CPU logout structure.
4367*0Sstevel@tonic-gate 	 * The I$/D$ way information is not initialized here
4368*0Sstevel@tonic-gate 	 * since it is captured in the logout assembly code.
4369*0Sstevel@tonic-gate 	 */
4370*0Sstevel@tonic-gate 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4371*0Sstevel@tonic-gate 		(ecp + i)->ec_way = i;
4372*0Sstevel@tonic-gate 
4373*0Sstevel@tonic-gate 	for (i = 0; i < PN_L2_NWAYS; i++)
4374*0Sstevel@tonic-gate 		(l2p + i)->ec_way = i;
4375*0Sstevel@tonic-gate }
4376*0Sstevel@tonic-gate 
4377*0Sstevel@tonic-gate /*
4378*0Sstevel@tonic-gate  * Returns whether fault address is valid for this error bit and
4379*0Sstevel@tonic-gate  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4380*0Sstevel@tonic-gate  */
4381*0Sstevel@tonic-gate int
4382*0Sstevel@tonic-gate cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4383*0Sstevel@tonic-gate {
4384*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4385*0Sstevel@tonic-gate 
4386*0Sstevel@tonic-gate 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4387*0Sstevel@tonic-gate 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4388*0Sstevel@tonic-gate 	    AFLT_STAT_VALID &&
4389*0Sstevel@tonic-gate 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4390*0Sstevel@tonic-gate }
4391*0Sstevel@tonic-gate 
4392*0Sstevel@tonic-gate static void
4393*0Sstevel@tonic-gate cpu_log_diag_info(ch_async_flt_t *ch_flt)
4394*0Sstevel@tonic-gate {
4395*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4396*0Sstevel@tonic-gate 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4397*0Sstevel@tonic-gate 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4398*0Sstevel@tonic-gate 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4399*0Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
4400*0Sstevel@tonic-gate 	int i, nway;
4401*0Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
4402*0Sstevel@tonic-gate 
4403*0Sstevel@tonic-gate 	/*
4404*0Sstevel@tonic-gate 	 * Check if the CPU log out captured was valid.
4405*0Sstevel@tonic-gate 	 */
4406*0Sstevel@tonic-gate 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4407*0Sstevel@tonic-gate 	    ch_flt->flt_data_incomplete)
4408*0Sstevel@tonic-gate 		return;
4409*0Sstevel@tonic-gate 
4410*0Sstevel@tonic-gate #if defined(CPU_IMP_ECACHE_ASSOC)
4411*0Sstevel@tonic-gate 	nway = cpu_ecache_nway();
4412*0Sstevel@tonic-gate 	i =  cpu_ecache_line_valid(ch_flt);
4413*0Sstevel@tonic-gate 	if (i == 0 || i > nway) {
4414*0Sstevel@tonic-gate 		for (i = 0; i < nway; i++)
4415*0Sstevel@tonic-gate 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4416*0Sstevel@tonic-gate 	} else
4417*0Sstevel@tonic-gate 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4418*0Sstevel@tonic-gate #else /* CPU_IMP_ECACHE_ASSOC */
4419*0Sstevel@tonic-gate 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4420*0Sstevel@tonic-gate #endif /* CPU_IMP_ECACHE_ASSOC */
4421*0Sstevel@tonic-gate 
4422*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
4423*0Sstevel@tonic-gate 	pn_cpu_log_diag_l2_info(ch_flt);
4424*0Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
4425*0Sstevel@tonic-gate 
4426*0Sstevel@tonic-gate 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4427*0Sstevel@tonic-gate 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4428*0Sstevel@tonic-gate 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4429*0Sstevel@tonic-gate 	}
4430*0Sstevel@tonic-gate 
4431*0Sstevel@tonic-gate 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4432*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4433*0Sstevel@tonic-gate 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4434*0Sstevel@tonic-gate 		else
4435*0Sstevel@tonic-gate 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4436*0Sstevel@tonic-gate 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4437*0Sstevel@tonic-gate 	}
4438*0Sstevel@tonic-gate }
4439*0Sstevel@tonic-gate 
4440*0Sstevel@tonic-gate /*
4441*0Sstevel@tonic-gate  * Cheetah ECC calculation.
4442*0Sstevel@tonic-gate  *
4443*0Sstevel@tonic-gate  * We only need to do the calculation on the data bits and can ignore check
4444*0Sstevel@tonic-gate  * bit and Mtag bit terms in the calculation.
4445*0Sstevel@tonic-gate  */
4446*0Sstevel@tonic-gate static uint64_t ch_ecc_table[9][2] = {
4447*0Sstevel@tonic-gate 	/*
4448*0Sstevel@tonic-gate 	 * low order 64-bits   high-order 64-bits
4449*0Sstevel@tonic-gate 	 */
4450*0Sstevel@tonic-gate 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4451*0Sstevel@tonic-gate 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4452*0Sstevel@tonic-gate 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4453*0Sstevel@tonic-gate 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4454*0Sstevel@tonic-gate 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4455*0Sstevel@tonic-gate 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4456*0Sstevel@tonic-gate 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4457*0Sstevel@tonic-gate 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4458*0Sstevel@tonic-gate 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4459*0Sstevel@tonic-gate };
4460*0Sstevel@tonic-gate 
4461*0Sstevel@tonic-gate /*
4462*0Sstevel@tonic-gate  * 64-bit population count, use well-known popcnt trick.
4463*0Sstevel@tonic-gate  * We could use the UltraSPARC V9 POPC instruction, but some
4464*0Sstevel@tonic-gate  * CPUs including Cheetahplus and Jaguar do not support that
4465*0Sstevel@tonic-gate  * instruction.
4466*0Sstevel@tonic-gate  */
4467*0Sstevel@tonic-gate int
4468*0Sstevel@tonic-gate popc64(uint64_t val)
4469*0Sstevel@tonic-gate {
4470*0Sstevel@tonic-gate 	int cnt;
4471*0Sstevel@tonic-gate 
4472*0Sstevel@tonic-gate 	for (cnt = 0; val != 0; val &= val - 1)
4473*0Sstevel@tonic-gate 		cnt++;
4474*0Sstevel@tonic-gate 	return (cnt);
4475*0Sstevel@tonic-gate }
4476*0Sstevel@tonic-gate 
4477*0Sstevel@tonic-gate /*
4478*0Sstevel@tonic-gate  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4479*0Sstevel@tonic-gate  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4480*0Sstevel@tonic-gate  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4481*0Sstevel@tonic-gate  * instead of doing all the xor's.
4482*0Sstevel@tonic-gate  */
4483*0Sstevel@tonic-gate uint32_t
4484*0Sstevel@tonic-gate us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4485*0Sstevel@tonic-gate {
4486*0Sstevel@tonic-gate 	int bitno, s;
4487*0Sstevel@tonic-gate 	int synd = 0;
4488*0Sstevel@tonic-gate 
4489*0Sstevel@tonic-gate 	for (bitno = 0; bitno < 9; bitno++) {
4490*0Sstevel@tonic-gate 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4491*0Sstevel@tonic-gate 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4492*0Sstevel@tonic-gate 		synd |= (s << bitno);
4493*0Sstevel@tonic-gate 	}
4494*0Sstevel@tonic-gate 	return (synd);
4495*0Sstevel@tonic-gate 
4496*0Sstevel@tonic-gate }
4497*0Sstevel@tonic-gate 
4498*0Sstevel@tonic-gate /*
4499*0Sstevel@tonic-gate  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4500*0Sstevel@tonic-gate  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4501*0Sstevel@tonic-gate  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4502*0Sstevel@tonic-gate  */
4503*0Sstevel@tonic-gate static void
4504*0Sstevel@tonic-gate cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4505*0Sstevel@tonic-gate     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4506*0Sstevel@tonic-gate {
4507*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4508*0Sstevel@tonic-gate 
4509*0Sstevel@tonic-gate 	if (reason &&
4510*0Sstevel@tonic-gate 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4511*0Sstevel@tonic-gate 		(void) strcat(reason, eccp->ec_reason);
4512*0Sstevel@tonic-gate 	}
4513*0Sstevel@tonic-gate 
4514*0Sstevel@tonic-gate 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4515*0Sstevel@tonic-gate 	ch_flt->flt_type = eccp->ec_flt_type;
4516*0Sstevel@tonic-gate 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4517*0Sstevel@tonic-gate 		ch_flt->flt_diag_data = *cdp;
4518*0Sstevel@tonic-gate 	else
4519*0Sstevel@tonic-gate 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4520*0Sstevel@tonic-gate 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4521*0Sstevel@tonic-gate 
4522*0Sstevel@tonic-gate 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4523*0Sstevel@tonic-gate 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4524*0Sstevel@tonic-gate 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4525*0Sstevel@tonic-gate 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4526*0Sstevel@tonic-gate 	else
4527*0Sstevel@tonic-gate 		aflt->flt_synd = 0;
4528*0Sstevel@tonic-gate 
4529*0Sstevel@tonic-gate 	aflt->flt_payload = eccp->ec_err_payload;
4530*0Sstevel@tonic-gate 
4531*0Sstevel@tonic-gate 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4532*0Sstevel@tonic-gate 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4533*0Sstevel@tonic-gate 		cpu_errorq_dispatch(eccp->ec_err_class,
4534*0Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4535*0Sstevel@tonic-gate 		    aflt->flt_panic);
4536*0Sstevel@tonic-gate 	else
4537*0Sstevel@tonic-gate 		cpu_errorq_dispatch(eccp->ec_err_class,
4538*0Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4539*0Sstevel@tonic-gate 		    aflt->flt_panic);
4540*0Sstevel@tonic-gate }
4541*0Sstevel@tonic-gate 
4542*0Sstevel@tonic-gate /*
4543*0Sstevel@tonic-gate  * Queue events on async event queue one event per error bit.  First we
4544*0Sstevel@tonic-gate  * queue the events that we "expect" for the given trap, then we queue events
4545*0Sstevel@tonic-gate  * that we may not expect.  Return number of events queued.
4546*0Sstevel@tonic-gate  */
4547*0Sstevel@tonic-gate int
4548*0Sstevel@tonic-gate cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4549*0Sstevel@tonic-gate     ch_cpu_logout_t *clop)
4550*0Sstevel@tonic-gate {
4551*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4552*0Sstevel@tonic-gate 	ecc_type_to_info_t *eccp;
4553*0Sstevel@tonic-gate 	int nevents = 0;
4554*0Sstevel@tonic-gate 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4555*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
4556*0Sstevel@tonic-gate 	uint64_t orig_t_afsr_errs;
4557*0Sstevel@tonic-gate #endif
4558*0Sstevel@tonic-gate 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4559*0Sstevel@tonic-gate 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4560*0Sstevel@tonic-gate 	ch_diag_data_t *cdp = NULL;
4561*0Sstevel@tonic-gate 
4562*0Sstevel@tonic-gate 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4563*0Sstevel@tonic-gate 
4564*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
4565*0Sstevel@tonic-gate 	orig_t_afsr_errs = t_afsr_errs;
4566*0Sstevel@tonic-gate 
4567*0Sstevel@tonic-gate 	/*
4568*0Sstevel@tonic-gate 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4569*0Sstevel@tonic-gate 	 */
4570*0Sstevel@tonic-gate 	if (clop != NULL) {
4571*0Sstevel@tonic-gate 		/*
4572*0Sstevel@tonic-gate 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4573*0Sstevel@tonic-gate 		 * flt_addr and flt_stat fields will be reset to the primaries
4574*0Sstevel@tonic-gate 		 * below, but the sdw_addr and sdw_stat will stay as the
4575*0Sstevel@tonic-gate 		 * secondaries.
4576*0Sstevel@tonic-gate 		 */
4577*0Sstevel@tonic-gate 		cdp = &clop->clo_sdw_data;
4578*0Sstevel@tonic-gate 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4579*0Sstevel@tonic-gate 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4580*0Sstevel@tonic-gate 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4581*0Sstevel@tonic-gate 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4582*0Sstevel@tonic-gate 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4583*0Sstevel@tonic-gate 
4584*0Sstevel@tonic-gate 		/*
4585*0Sstevel@tonic-gate 		 * If the primary and shadow AFSR differ, tag the shadow as
4586*0Sstevel@tonic-gate 		 * the first fault.
4587*0Sstevel@tonic-gate 		 */
4588*0Sstevel@tonic-gate 		if ((primary_afar != cdp->chd_afar) ||
4589*0Sstevel@tonic-gate 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4590*0Sstevel@tonic-gate 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4591*0Sstevel@tonic-gate 		}
4592*0Sstevel@tonic-gate 
4593*0Sstevel@tonic-gate 		/*
4594*0Sstevel@tonic-gate 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4595*0Sstevel@tonic-gate 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4596*0Sstevel@tonic-gate 		 * is expected to be zero for those CPUs which do not have
4597*0Sstevel@tonic-gate 		 * an AFSR_EXT register.
4598*0Sstevel@tonic-gate 		 */
4599*0Sstevel@tonic-gate 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4600*0Sstevel@tonic-gate 			if ((eccp->ec_afsr_bit &
4601*0Sstevel@tonic-gate 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4602*0Sstevel@tonic-gate 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4603*0Sstevel@tonic-gate 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4604*0Sstevel@tonic-gate 				cdp = NULL;
4605*0Sstevel@tonic-gate 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4606*0Sstevel@tonic-gate 				nevents++;
4607*0Sstevel@tonic-gate 			}
4608*0Sstevel@tonic-gate 		}
4609*0Sstevel@tonic-gate 
4610*0Sstevel@tonic-gate 		/*
4611*0Sstevel@tonic-gate 		 * If the ME bit is on in the primary AFSR turn all the
4612*0Sstevel@tonic-gate 		 * error bits on again that may set the ME bit to make
4613*0Sstevel@tonic-gate 		 * sure we see the ME AFSR error logs.
4614*0Sstevel@tonic-gate 		 */
4615*0Sstevel@tonic-gate 		if ((primary_afsr & C_AFSR_ME) != 0)
4616*0Sstevel@tonic-gate 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4617*0Sstevel@tonic-gate 	}
4618*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
4619*0Sstevel@tonic-gate 
4620*0Sstevel@tonic-gate 	if (clop != NULL)
4621*0Sstevel@tonic-gate 		cdp = &clop->clo_data;
4622*0Sstevel@tonic-gate 
4623*0Sstevel@tonic-gate 	/*
4624*0Sstevel@tonic-gate 	 * Queue expected errors, error bit and fault type must match
4625*0Sstevel@tonic-gate 	 * in the ecc_type_to_info table.
4626*0Sstevel@tonic-gate 	 */
4627*0Sstevel@tonic-gate 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4628*0Sstevel@tonic-gate 	    eccp++) {
4629*0Sstevel@tonic-gate 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4630*0Sstevel@tonic-gate 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4631*0Sstevel@tonic-gate #if defined(SERRANO)
4632*0Sstevel@tonic-gate 			/*
4633*0Sstevel@tonic-gate 			 * For FRC/FRU errors on Serrano the afar2 captures
4634*0Sstevel@tonic-gate 			 * the address and the associated data is
4635*0Sstevel@tonic-gate 			 * in the shadow logout area.
4636*0Sstevel@tonic-gate 			 */
4637*0Sstevel@tonic-gate 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4638*0Sstevel@tonic-gate 				if (clop != NULL)
4639*0Sstevel@tonic-gate 					cdp = &clop->clo_sdw_data;
4640*0Sstevel@tonic-gate 				aflt->flt_addr = ch_flt->afar2;
4641*0Sstevel@tonic-gate 			} else {
4642*0Sstevel@tonic-gate 				if (clop != NULL)
4643*0Sstevel@tonic-gate 					cdp = &clop->clo_data;
4644*0Sstevel@tonic-gate 				aflt->flt_addr = primary_afar;
4645*0Sstevel@tonic-gate 			}
4646*0Sstevel@tonic-gate #else	/* SERRANO */
4647*0Sstevel@tonic-gate 			aflt->flt_addr = primary_afar;
4648*0Sstevel@tonic-gate #endif	/* SERRANO */
4649*0Sstevel@tonic-gate 			aflt->flt_stat = primary_afsr;
4650*0Sstevel@tonic-gate 			ch_flt->afsr_ext = primary_afsr_ext;
4651*0Sstevel@tonic-gate 			ch_flt->afsr_errs = primary_afsr_errs;
4652*0Sstevel@tonic-gate 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4653*0Sstevel@tonic-gate 			cdp = NULL;
4654*0Sstevel@tonic-gate 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4655*0Sstevel@tonic-gate 			nevents++;
4656*0Sstevel@tonic-gate 		}
4657*0Sstevel@tonic-gate 	}
4658*0Sstevel@tonic-gate 
4659*0Sstevel@tonic-gate 	/*
4660*0Sstevel@tonic-gate 	 * Queue unexpected errors, error bit only match.
4661*0Sstevel@tonic-gate 	 */
4662*0Sstevel@tonic-gate 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4663*0Sstevel@tonic-gate 	    eccp++) {
4664*0Sstevel@tonic-gate 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4665*0Sstevel@tonic-gate #if defined(SERRANO)
4666*0Sstevel@tonic-gate 			/*
4667*0Sstevel@tonic-gate 			 * For FRC/FRU errors on Serrano the afar2 captures
4668*0Sstevel@tonic-gate 			 * the address and the associated data is
4669*0Sstevel@tonic-gate 			 * in the shadow logout area.
4670*0Sstevel@tonic-gate 			 */
4671*0Sstevel@tonic-gate 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4672*0Sstevel@tonic-gate 				if (clop != NULL)
4673*0Sstevel@tonic-gate 					cdp = &clop->clo_sdw_data;
4674*0Sstevel@tonic-gate 				aflt->flt_addr = ch_flt->afar2;
4675*0Sstevel@tonic-gate 			} else {
4676*0Sstevel@tonic-gate 				if (clop != NULL)
4677*0Sstevel@tonic-gate 					cdp = &clop->clo_data;
4678*0Sstevel@tonic-gate 				aflt->flt_addr = primary_afar;
4679*0Sstevel@tonic-gate 			}
4680*0Sstevel@tonic-gate #else	/* SERRANO */
4681*0Sstevel@tonic-gate 			aflt->flt_addr = primary_afar;
4682*0Sstevel@tonic-gate #endif	/* SERRANO */
4683*0Sstevel@tonic-gate 			aflt->flt_stat = primary_afsr;
4684*0Sstevel@tonic-gate 			ch_flt->afsr_ext = primary_afsr_ext;
4685*0Sstevel@tonic-gate 			ch_flt->afsr_errs = primary_afsr_errs;
4686*0Sstevel@tonic-gate 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4687*0Sstevel@tonic-gate 			cdp = NULL;
4688*0Sstevel@tonic-gate 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4689*0Sstevel@tonic-gate 			nevents++;
4690*0Sstevel@tonic-gate 		}
4691*0Sstevel@tonic-gate 	}
4692*0Sstevel@tonic-gate 	return (nevents);
4693*0Sstevel@tonic-gate }
4694*0Sstevel@tonic-gate 
4695*0Sstevel@tonic-gate /*
4696*0Sstevel@tonic-gate  * Return trap type number.
4697*0Sstevel@tonic-gate  */
4698*0Sstevel@tonic-gate uint8_t
4699*0Sstevel@tonic-gate flt_to_trap_type(struct async_flt *aflt)
4700*0Sstevel@tonic-gate {
4701*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_I_TRAP)
4702*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_I);
4703*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_D_TRAP)
4704*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_D);
4705*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_F_TRAP)
4706*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_F);
4707*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_C_TRAP)
4708*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_C);
4709*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_DP_TRAP)
4710*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_DP);
4711*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_IP_TRAP)
4712*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_IP);
4713*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_ITLB_TRAP)
4714*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_ITLB);
4715*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_DTLB_TRAP)
4716*0Sstevel@tonic-gate 		return (TRAP_TYPE_ECC_DTLB);
4717*0Sstevel@tonic-gate 	return (TRAP_TYPE_UNKNOWN);
4718*0Sstevel@tonic-gate }
4719*0Sstevel@tonic-gate 
4720*0Sstevel@tonic-gate /*
4721*0Sstevel@tonic-gate  * Decide an error type based on detector and leaky/partner tests.
4722*0Sstevel@tonic-gate  * The following array is used for quick translation - it must
4723*0Sstevel@tonic-gate  * stay in sync with ce_dispact_t.
4724*0Sstevel@tonic-gate  */
4725*0Sstevel@tonic-gate 
4726*0Sstevel@tonic-gate static char *cetypes[] = {
4727*0Sstevel@tonic-gate 	CE_DISP_DESC_U,
4728*0Sstevel@tonic-gate 	CE_DISP_DESC_I,
4729*0Sstevel@tonic-gate 	CE_DISP_DESC_PP,
4730*0Sstevel@tonic-gate 	CE_DISP_DESC_P,
4731*0Sstevel@tonic-gate 	CE_DISP_DESC_L,
4732*0Sstevel@tonic-gate 	CE_DISP_DESC_PS,
4733*0Sstevel@tonic-gate 	CE_DISP_DESC_S
4734*0Sstevel@tonic-gate };
4735*0Sstevel@tonic-gate 
4736*0Sstevel@tonic-gate char *
4737*0Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt)
4738*0Sstevel@tonic-gate {
4739*0Sstevel@tonic-gate 	ce_dispact_t dispact, disp;
4740*0Sstevel@tonic-gate 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4741*0Sstevel@tonic-gate 
4742*0Sstevel@tonic-gate 	/*
4743*0Sstevel@tonic-gate 	 * The memory payload bundle is shared by some events that do
4744*0Sstevel@tonic-gate 	 * not perform any classification.  For those flt_disp will be
4745*0Sstevel@tonic-gate 	 * 0 and we will return "unknown".
4746*0Sstevel@tonic-gate 	 */
4747*0Sstevel@tonic-gate 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4748*0Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
4749*0Sstevel@tonic-gate 
4750*0Sstevel@tonic-gate 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4751*0Sstevel@tonic-gate 
4752*0Sstevel@tonic-gate 	/*
4753*0Sstevel@tonic-gate 	 * It is also possible that no scrub/classification was performed
4754*0Sstevel@tonic-gate 	 * by the detector, for instance where a disrupting error logged
4755*0Sstevel@tonic-gate 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4756*0Sstevel@tonic-gate 	 */
4757*0Sstevel@tonic-gate 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4758*0Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
4759*0Sstevel@tonic-gate 
4760*0Sstevel@tonic-gate 	/*
4761*0Sstevel@tonic-gate 	 * Lookup type in initial classification/action table
4762*0Sstevel@tonic-gate 	 */
4763*0Sstevel@tonic-gate 	dispact = CE_DISPACT(ce_disp_table,
4764*0Sstevel@tonic-gate 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4765*0Sstevel@tonic-gate 	    CE_XDIAG_STATE(dtcrinfo),
4766*0Sstevel@tonic-gate 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4767*0Sstevel@tonic-gate 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4768*0Sstevel@tonic-gate 
4769*0Sstevel@tonic-gate 	/*
4770*0Sstevel@tonic-gate 	 * A bad lookup is not something to panic production systems for.
4771*0Sstevel@tonic-gate 	 */
4772*0Sstevel@tonic-gate 	ASSERT(dispact != CE_DISP_BAD);
4773*0Sstevel@tonic-gate 	if (dispact == CE_DISP_BAD)
4774*0Sstevel@tonic-gate 		return (cetypes[CE_DISP_UNKNOWN]);
4775*0Sstevel@tonic-gate 
4776*0Sstevel@tonic-gate 	disp = CE_DISP(dispact);
4777*0Sstevel@tonic-gate 
4778*0Sstevel@tonic-gate 	switch (disp) {
4779*0Sstevel@tonic-gate 	case CE_DISP_UNKNOWN:
4780*0Sstevel@tonic-gate 	case CE_DISP_INTERMITTENT:
4781*0Sstevel@tonic-gate 		break;
4782*0Sstevel@tonic-gate 
4783*0Sstevel@tonic-gate 	case CE_DISP_POSS_PERS:
4784*0Sstevel@tonic-gate 		/*
4785*0Sstevel@tonic-gate 		 * "Possible persistent" errors to which we have applied a valid
4786*0Sstevel@tonic-gate 		 * leaky test can be separated into "persistent" or "leaky".
4787*0Sstevel@tonic-gate 		 */
4788*0Sstevel@tonic-gate 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4789*0Sstevel@tonic-gate 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4790*0Sstevel@tonic-gate 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4791*0Sstevel@tonic-gate 			    CE_XDIAG_CE2SEEN(lkyinfo))
4792*0Sstevel@tonic-gate 				disp = CE_DISP_LEAKY;
4793*0Sstevel@tonic-gate 			else
4794*0Sstevel@tonic-gate 				disp = CE_DISP_PERS;
4795*0Sstevel@tonic-gate 		}
4796*0Sstevel@tonic-gate 		break;
4797*0Sstevel@tonic-gate 
4798*0Sstevel@tonic-gate 	case CE_DISP_POSS_STICKY:
4799*0Sstevel@tonic-gate 		/*
4800*0Sstevel@tonic-gate 		 * Promote "possible sticky" results that have been
4801*0Sstevel@tonic-gate 		 * confirmed by a partner test to "sticky".  Unconfirmed
4802*0Sstevel@tonic-gate 		 * "possible sticky" events are left at that status - we do not
4803*0Sstevel@tonic-gate 		 * guess at any bad reader/writer etc status here.
4804*0Sstevel@tonic-gate 		 */
4805*0Sstevel@tonic-gate 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4806*0Sstevel@tonic-gate 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4807*0Sstevel@tonic-gate 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4808*0Sstevel@tonic-gate 			disp = CE_DISP_STICKY;
4809*0Sstevel@tonic-gate 
4810*0Sstevel@tonic-gate 		/*
4811*0Sstevel@tonic-gate 		 * Promote "possible sticky" results on a uniprocessor
4812*0Sstevel@tonic-gate 		 * to "sticky"
4813*0Sstevel@tonic-gate 		 */
4814*0Sstevel@tonic-gate 		if (disp == CE_DISP_POSS_STICKY &&
4815*0Sstevel@tonic-gate 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4816*0Sstevel@tonic-gate 			disp = CE_DISP_STICKY;
4817*0Sstevel@tonic-gate 		break;
4818*0Sstevel@tonic-gate 
4819*0Sstevel@tonic-gate 	default:
4820*0Sstevel@tonic-gate 		disp = CE_DISP_UNKNOWN;
4821*0Sstevel@tonic-gate 		break;
4822*0Sstevel@tonic-gate 	}
4823*0Sstevel@tonic-gate 
4824*0Sstevel@tonic-gate 	return (cetypes[disp]);
4825*0Sstevel@tonic-gate }
4826*0Sstevel@tonic-gate 
4827*0Sstevel@tonic-gate /*
4828*0Sstevel@tonic-gate  * Given the entire afsr, the specific bit to check and a prioritized list of
4829*0Sstevel@tonic-gate  * error bits, determine the validity of the various overwrite priority
4830*0Sstevel@tonic-gate  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4831*0Sstevel@tonic-gate  * different overwrite priorities.
4832*0Sstevel@tonic-gate  *
4833*0Sstevel@tonic-gate  * Given a specific afsr error bit and the entire afsr, there are three cases:
4834*0Sstevel@tonic-gate  *   INVALID:	The specified bit is lower overwrite priority than some other
4835*0Sstevel@tonic-gate  *		error bit which is on in the afsr (or IVU/IVC).
4836*0Sstevel@tonic-gate  *   VALID:	The specified bit is higher priority than all other error bits
4837*0Sstevel@tonic-gate  *		which are on in the afsr.
4838*0Sstevel@tonic-gate  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4839*0Sstevel@tonic-gate  *		bit is on in the afsr.
4840*0Sstevel@tonic-gate  */
4841*0Sstevel@tonic-gate int
4842*0Sstevel@tonic-gate afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4843*0Sstevel@tonic-gate {
4844*0Sstevel@tonic-gate 	uint64_t afsr_ow;
4845*0Sstevel@tonic-gate 
4846*0Sstevel@tonic-gate 	while ((afsr_ow = *ow_bits++) != 0) {
4847*0Sstevel@tonic-gate 		/*
4848*0Sstevel@tonic-gate 		 * If bit is in the priority class, check to see if another
4849*0Sstevel@tonic-gate 		 * bit in the same class is on => ambiguous.  Otherwise,
4850*0Sstevel@tonic-gate 		 * the value is valid.  If the bit is not on at this priority
4851*0Sstevel@tonic-gate 		 * class, but a higher priority bit is on, then the value is
4852*0Sstevel@tonic-gate 		 * invalid.
4853*0Sstevel@tonic-gate 		 */
4854*0Sstevel@tonic-gate 		if (afsr_ow & afsr_bit) {
4855*0Sstevel@tonic-gate 			/*
4856*0Sstevel@tonic-gate 			 * If equal pri bit is on, ambiguous.
4857*0Sstevel@tonic-gate 			 */
4858*0Sstevel@tonic-gate 			if (afsr & (afsr_ow & ~afsr_bit))
4859*0Sstevel@tonic-gate 				return (AFLT_STAT_AMBIGUOUS);
4860*0Sstevel@tonic-gate 			return (AFLT_STAT_VALID);
4861*0Sstevel@tonic-gate 		} else if (afsr & afsr_ow)
4862*0Sstevel@tonic-gate 			break;
4863*0Sstevel@tonic-gate 	}
4864*0Sstevel@tonic-gate 
4865*0Sstevel@tonic-gate 	/*
4866*0Sstevel@tonic-gate 	 * We didn't find a match or a higher priority bit was on.  Not
4867*0Sstevel@tonic-gate 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
4868*0Sstevel@tonic-gate 	 */
4869*0Sstevel@tonic-gate 	return (AFLT_STAT_INVALID);
4870*0Sstevel@tonic-gate }
4871*0Sstevel@tonic-gate 
4872*0Sstevel@tonic-gate static int
4873*0Sstevel@tonic-gate afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
4874*0Sstevel@tonic-gate {
4875*0Sstevel@tonic-gate #if defined(SERRANO)
4876*0Sstevel@tonic-gate 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
4877*0Sstevel@tonic-gate 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
4878*0Sstevel@tonic-gate 	else
4879*0Sstevel@tonic-gate #endif	/* SERRANO */
4880*0Sstevel@tonic-gate 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
4881*0Sstevel@tonic-gate }
4882*0Sstevel@tonic-gate 
4883*0Sstevel@tonic-gate static int
4884*0Sstevel@tonic-gate afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
4885*0Sstevel@tonic-gate {
4886*0Sstevel@tonic-gate 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
4887*0Sstevel@tonic-gate }
4888*0Sstevel@tonic-gate 
4889*0Sstevel@tonic-gate static int
4890*0Sstevel@tonic-gate afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
4891*0Sstevel@tonic-gate {
4892*0Sstevel@tonic-gate 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
4893*0Sstevel@tonic-gate }
4894*0Sstevel@tonic-gate 
4895*0Sstevel@tonic-gate static int
4896*0Sstevel@tonic-gate afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
4897*0Sstevel@tonic-gate {
4898*0Sstevel@tonic-gate #ifdef lint
4899*0Sstevel@tonic-gate 	cpuid = cpuid;
4900*0Sstevel@tonic-gate #endif
4901*0Sstevel@tonic-gate 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
4902*0Sstevel@tonic-gate 		return (afsr_to_msynd_status(afsr, afsr_bit));
4903*0Sstevel@tonic-gate 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
4904*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
4905*0Sstevel@tonic-gate 		/*
4906*0Sstevel@tonic-gate 		 * The E_SYND overwrite policy is slightly different
4907*0Sstevel@tonic-gate 		 * for Panther CPUs.
4908*0Sstevel@tonic-gate 		 */
4909*0Sstevel@tonic-gate 		if (IS_PANTHER(cpunodes[cpuid].implementation))
4910*0Sstevel@tonic-gate 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
4911*0Sstevel@tonic-gate 		else
4912*0Sstevel@tonic-gate 			return (afsr_to_esynd_status(afsr, afsr_bit));
4913*0Sstevel@tonic-gate #else /* CHEETAH_PLUS */
4914*0Sstevel@tonic-gate 		return (afsr_to_esynd_status(afsr, afsr_bit));
4915*0Sstevel@tonic-gate #endif /* CHEETAH_PLUS */
4916*0Sstevel@tonic-gate 	} else {
4917*0Sstevel@tonic-gate 		return (AFLT_STAT_INVALID);
4918*0Sstevel@tonic-gate 	}
4919*0Sstevel@tonic-gate }
4920*0Sstevel@tonic-gate 
4921*0Sstevel@tonic-gate /*
4922*0Sstevel@tonic-gate  * Slave CPU stick synchronization.
4923*0Sstevel@tonic-gate  */
4924*0Sstevel@tonic-gate void
4925*0Sstevel@tonic-gate sticksync_slave(void)
4926*0Sstevel@tonic-gate {
4927*0Sstevel@tonic-gate 	int 		i;
4928*0Sstevel@tonic-gate 	int		tries = 0;
4929*0Sstevel@tonic-gate 	int64_t		tskew;
4930*0Sstevel@tonic-gate 	int64_t		av_tskew;
4931*0Sstevel@tonic-gate 
4932*0Sstevel@tonic-gate 	kpreempt_disable();
4933*0Sstevel@tonic-gate 	/* wait for the master side */
4934*0Sstevel@tonic-gate 	while (stick_sync_cmd != SLAVE_START)
4935*0Sstevel@tonic-gate 		;
4936*0Sstevel@tonic-gate 	/*
4937*0Sstevel@tonic-gate 	 * Synchronization should only take a few tries at most. But in the
4938*0Sstevel@tonic-gate 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
4939*0Sstevel@tonic-gate 	 * without it's stick synchronized wouldn't be a good citizen.
4940*0Sstevel@tonic-gate 	 */
4941*0Sstevel@tonic-gate 	while (slave_done == 0) {
4942*0Sstevel@tonic-gate 		/*
4943*0Sstevel@tonic-gate 		 * Time skew calculation.
4944*0Sstevel@tonic-gate 		 */
4945*0Sstevel@tonic-gate 		av_tskew = tskew = 0;
4946*0Sstevel@tonic-gate 
4947*0Sstevel@tonic-gate 		for (i = 0; i < stick_iter; i++) {
4948*0Sstevel@tonic-gate 			/* make location hot */
4949*0Sstevel@tonic-gate 			timestamp[EV_A_START] = 0;
4950*0Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_A_START]);
4951*0Sstevel@tonic-gate 
4952*0Sstevel@tonic-gate 			/* tell the master we're ready */
4953*0Sstevel@tonic-gate 			stick_sync_cmd = MASTER_START;
4954*0Sstevel@tonic-gate 
4955*0Sstevel@tonic-gate 			/* and wait */
4956*0Sstevel@tonic-gate 			while (stick_sync_cmd != SLAVE_CONT)
4957*0Sstevel@tonic-gate 				;
4958*0Sstevel@tonic-gate 			/* Event B end */
4959*0Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_B_END]);
4960*0Sstevel@tonic-gate 
4961*0Sstevel@tonic-gate 			/* calculate time skew */
4962*0Sstevel@tonic-gate 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
4963*0Sstevel@tonic-gate 				- (timestamp[EV_A_END] -
4964*0Sstevel@tonic-gate 				timestamp[EV_A_START])) / 2;
4965*0Sstevel@tonic-gate 
4966*0Sstevel@tonic-gate 			/* keep running count */
4967*0Sstevel@tonic-gate 			av_tskew += tskew;
4968*0Sstevel@tonic-gate 		} /* for */
4969*0Sstevel@tonic-gate 
4970*0Sstevel@tonic-gate 		/*
4971*0Sstevel@tonic-gate 		 * Adjust stick for time skew if not within the max allowed;
4972*0Sstevel@tonic-gate 		 * otherwise we're all done.
4973*0Sstevel@tonic-gate 		 */
4974*0Sstevel@tonic-gate 		if (stick_iter != 0)
4975*0Sstevel@tonic-gate 			av_tskew = av_tskew/stick_iter;
4976*0Sstevel@tonic-gate 		if (ABS(av_tskew) > stick_tsk) {
4977*0Sstevel@tonic-gate 			/*
4978*0Sstevel@tonic-gate 			 * If the skew is 1 (the slave's STICK register
4979*0Sstevel@tonic-gate 			 * is 1 STICK ahead of the master's), stick_adj
4980*0Sstevel@tonic-gate 			 * could fail to adjust the slave's STICK register
4981*0Sstevel@tonic-gate 			 * if the STICK read on the slave happens to
4982*0Sstevel@tonic-gate 			 * align with the increment of the STICK.
4983*0Sstevel@tonic-gate 			 * Therefore, we increment the skew to 2.
4984*0Sstevel@tonic-gate 			 */
4985*0Sstevel@tonic-gate 			if (av_tskew == 1)
4986*0Sstevel@tonic-gate 				av_tskew++;
4987*0Sstevel@tonic-gate 			stick_adj(-av_tskew);
4988*0Sstevel@tonic-gate 		} else
4989*0Sstevel@tonic-gate 			slave_done = 1;
4990*0Sstevel@tonic-gate #ifdef DEBUG
4991*0Sstevel@tonic-gate 		if (tries < DSYNC_ATTEMPTS)
4992*0Sstevel@tonic-gate 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
4993*0Sstevel@tonic-gate 				av_tskew;
4994*0Sstevel@tonic-gate 		++tries;
4995*0Sstevel@tonic-gate #endif /* DEBUG */
4996*0Sstevel@tonic-gate #ifdef lint
4997*0Sstevel@tonic-gate 		tries = tries;
4998*0Sstevel@tonic-gate #endif
4999*0Sstevel@tonic-gate 
5000*0Sstevel@tonic-gate 	} /* while */
5001*0Sstevel@tonic-gate 
5002*0Sstevel@tonic-gate 	/* allow the master to finish */
5003*0Sstevel@tonic-gate 	stick_sync_cmd = EVENT_NULL;
5004*0Sstevel@tonic-gate 	kpreempt_enable();
5005*0Sstevel@tonic-gate }
5006*0Sstevel@tonic-gate 
5007*0Sstevel@tonic-gate /*
5008*0Sstevel@tonic-gate  * Master CPU side of stick synchronization.
5009*0Sstevel@tonic-gate  *  - timestamp end of Event A
5010*0Sstevel@tonic-gate  *  - timestamp beginning of Event B
5011*0Sstevel@tonic-gate  */
5012*0Sstevel@tonic-gate void
5013*0Sstevel@tonic-gate sticksync_master(void)
5014*0Sstevel@tonic-gate {
5015*0Sstevel@tonic-gate 	int		i;
5016*0Sstevel@tonic-gate 
5017*0Sstevel@tonic-gate 	kpreempt_disable();
5018*0Sstevel@tonic-gate 	/* tell the slave we've started */
5019*0Sstevel@tonic-gate 	slave_done = 0;
5020*0Sstevel@tonic-gate 	stick_sync_cmd = SLAVE_START;
5021*0Sstevel@tonic-gate 
5022*0Sstevel@tonic-gate 	while (slave_done == 0) {
5023*0Sstevel@tonic-gate 		for (i = 0; i < stick_iter; i++) {
5024*0Sstevel@tonic-gate 			/* wait for the slave */
5025*0Sstevel@tonic-gate 			while (stick_sync_cmd != MASTER_START)
5026*0Sstevel@tonic-gate 				;
5027*0Sstevel@tonic-gate 			/* Event A end */
5028*0Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_A_END]);
5029*0Sstevel@tonic-gate 
5030*0Sstevel@tonic-gate 			/* make location hot */
5031*0Sstevel@tonic-gate 			timestamp[EV_B_START] = 0;
5032*0Sstevel@tonic-gate 			stick_timestamp(&timestamp[EV_B_START]);
5033*0Sstevel@tonic-gate 
5034*0Sstevel@tonic-gate 			/* tell the slave to continue */
5035*0Sstevel@tonic-gate 			stick_sync_cmd = SLAVE_CONT;
5036*0Sstevel@tonic-gate 		} /* for */
5037*0Sstevel@tonic-gate 
5038*0Sstevel@tonic-gate 		/* wait while slave calculates time skew */
5039*0Sstevel@tonic-gate 		while (stick_sync_cmd == SLAVE_CONT)
5040*0Sstevel@tonic-gate 			;
5041*0Sstevel@tonic-gate 	} /* while */
5042*0Sstevel@tonic-gate 	kpreempt_enable();
5043*0Sstevel@tonic-gate }
5044*0Sstevel@tonic-gate 
5045*0Sstevel@tonic-gate /*
5046*0Sstevel@tonic-gate  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5047*0Sstevel@tonic-gate  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5048*0Sstevel@tonic-gate  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5049*0Sstevel@tonic-gate  * panic idle.
5050*0Sstevel@tonic-gate  */
5051*0Sstevel@tonic-gate /*ARGSUSED*/
5052*0Sstevel@tonic-gate void
5053*0Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt)
5054*0Sstevel@tonic-gate {}
5055*0Sstevel@tonic-gate 
5056*0Sstevel@tonic-gate struct kmem_cache *ch_private_cache;
5057*0Sstevel@tonic-gate 
5058*0Sstevel@tonic-gate /*
5059*0Sstevel@tonic-gate  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5060*0Sstevel@tonic-gate  * deallocate the scrubber data structures and cpu_private data structure.
5061*0Sstevel@tonic-gate  */
5062*0Sstevel@tonic-gate void
5063*0Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp)
5064*0Sstevel@tonic-gate {
5065*0Sstevel@tonic-gate 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5066*0Sstevel@tonic-gate 
5067*0Sstevel@tonic-gate 	ASSERT(chprp);
5068*0Sstevel@tonic-gate 	cpu_uninit_ecache_scrub_dr(cp);
5069*0Sstevel@tonic-gate 	CPU_PRIVATE(cp) = NULL;
5070*0Sstevel@tonic-gate 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5071*0Sstevel@tonic-gate 	kmem_cache_free(ch_private_cache, chprp);
5072*0Sstevel@tonic-gate 	cmp_delete_cpu(cp->cpu_id);
5073*0Sstevel@tonic-gate 
5074*0Sstevel@tonic-gate }
5075*0Sstevel@tonic-gate 
5076*0Sstevel@tonic-gate /*
5077*0Sstevel@tonic-gate  * Cheetah Cache Scrubbing
5078*0Sstevel@tonic-gate  *
5079*0Sstevel@tonic-gate  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5080*0Sstevel@tonic-gate  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5081*0Sstevel@tonic-gate  * protected by either parity or ECC.
5082*0Sstevel@tonic-gate  *
5083*0Sstevel@tonic-gate  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5084*0Sstevel@tonic-gate  * cache per second). Due to the the specifics of how the I$ control
5085*0Sstevel@tonic-gate  * logic works with respect to the ASI used to scrub I$ lines, the entire
5086*0Sstevel@tonic-gate  * I$ is scanned at once.
5087*0Sstevel@tonic-gate  */
5088*0Sstevel@tonic-gate 
5089*0Sstevel@tonic-gate /*
5090*0Sstevel@tonic-gate  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5091*0Sstevel@tonic-gate  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5092*0Sstevel@tonic-gate  * on a running system.
5093*0Sstevel@tonic-gate  */
5094*0Sstevel@tonic-gate int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5095*0Sstevel@tonic-gate 
5096*0Sstevel@tonic-gate /*
5097*0Sstevel@tonic-gate  * The following are the PIL levels that the softints/cross traps will fire at.
5098*0Sstevel@tonic-gate  */
5099*0Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5100*0Sstevel@tonic-gate uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5101*0Sstevel@tonic-gate uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5102*0Sstevel@tonic-gate 
5103*0Sstevel@tonic-gate #if defined(JALAPENO)
5104*0Sstevel@tonic-gate 
5105*0Sstevel@tonic-gate /*
5106*0Sstevel@tonic-gate  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5107*0Sstevel@tonic-gate  * on Jalapeno.
5108*0Sstevel@tonic-gate  */
5109*0Sstevel@tonic-gate int ecache_scrub_enable = 0;
5110*0Sstevel@tonic-gate 
5111*0Sstevel@tonic-gate #else	/* JALAPENO */
5112*0Sstevel@tonic-gate 
5113*0Sstevel@tonic-gate /*
5114*0Sstevel@tonic-gate  * With all other cpu types, E$ scrubbing is on by default
5115*0Sstevel@tonic-gate  */
5116*0Sstevel@tonic-gate int ecache_scrub_enable = 1;
5117*0Sstevel@tonic-gate 
5118*0Sstevel@tonic-gate #endif	/* JALAPENO */
5119*0Sstevel@tonic-gate 
5120*0Sstevel@tonic-gate 
5121*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5122*0Sstevel@tonic-gate 
5123*0Sstevel@tonic-gate /*
5124*0Sstevel@tonic-gate  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5125*0Sstevel@tonic-gate  * is disabled by default on non-Cheetah systems
5126*0Sstevel@tonic-gate  */
5127*0Sstevel@tonic-gate int icache_scrub_enable = 0;
5128*0Sstevel@tonic-gate 
5129*0Sstevel@tonic-gate /*
5130*0Sstevel@tonic-gate  * Tuneables specifying the scrub calls per second and the scan rate
5131*0Sstevel@tonic-gate  * for each cache
5132*0Sstevel@tonic-gate  *
5133*0Sstevel@tonic-gate  * The cyclic times are set during boot based on the following values.
5134*0Sstevel@tonic-gate  * Changing these values in mdb after this time will have no effect.  If
5135*0Sstevel@tonic-gate  * a different value is desired, it must be set in /etc/system before a
5136*0Sstevel@tonic-gate  * reboot.
5137*0Sstevel@tonic-gate  */
5138*0Sstevel@tonic-gate int ecache_calls_a_sec = 1;
5139*0Sstevel@tonic-gate int dcache_calls_a_sec = 2;
5140*0Sstevel@tonic-gate int icache_calls_a_sec = 2;
5141*0Sstevel@tonic-gate 
5142*0Sstevel@tonic-gate int ecache_scan_rate_idle = 1;
5143*0Sstevel@tonic-gate int ecache_scan_rate_busy = 1;
5144*0Sstevel@tonic-gate int dcache_scan_rate_idle = 1;
5145*0Sstevel@tonic-gate int dcache_scan_rate_busy = 1;
5146*0Sstevel@tonic-gate int icache_scan_rate_idle = 1;
5147*0Sstevel@tonic-gate int icache_scan_rate_busy = 1;
5148*0Sstevel@tonic-gate 
5149*0Sstevel@tonic-gate #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5150*0Sstevel@tonic-gate 
5151*0Sstevel@tonic-gate int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5152*0Sstevel@tonic-gate 
5153*0Sstevel@tonic-gate int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5154*0Sstevel@tonic-gate int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5155*0Sstevel@tonic-gate int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5156*0Sstevel@tonic-gate 
5157*0Sstevel@tonic-gate int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5158*0Sstevel@tonic-gate int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5159*0Sstevel@tonic-gate int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5160*0Sstevel@tonic-gate int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5161*0Sstevel@tonic-gate int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5162*0Sstevel@tonic-gate int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5163*0Sstevel@tonic-gate 
5164*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5165*0Sstevel@tonic-gate 
5166*0Sstevel@tonic-gate /*
5167*0Sstevel@tonic-gate  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5168*0Sstevel@tonic-gate  * increment the outstanding request counter and schedule a softint to run
5169*0Sstevel@tonic-gate  * the scrubber.
5170*0Sstevel@tonic-gate  */
5171*0Sstevel@tonic-gate extern xcfunc_t cache_scrubreq_tl1;
5172*0Sstevel@tonic-gate 
5173*0Sstevel@tonic-gate /*
5174*0Sstevel@tonic-gate  * These are the softint functions for each cache scrubber
5175*0Sstevel@tonic-gate  */
5176*0Sstevel@tonic-gate static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5177*0Sstevel@tonic-gate static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5178*0Sstevel@tonic-gate static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5179*0Sstevel@tonic-gate 
5180*0Sstevel@tonic-gate /*
5181*0Sstevel@tonic-gate  * The cache scrub info table contains cache specific information
5182*0Sstevel@tonic-gate  * and allows for some of the scrub code to be table driven, reducing
5183*0Sstevel@tonic-gate  * duplication of cache similar code.
5184*0Sstevel@tonic-gate  *
5185*0Sstevel@tonic-gate  * This table keeps a copy of the value in the calls per second variable
5186*0Sstevel@tonic-gate  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5187*0Sstevel@tonic-gate  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5188*0Sstevel@tonic-gate  * mdb in a misguided attempt to disable the scrubber).
5189*0Sstevel@tonic-gate  */
5190*0Sstevel@tonic-gate struct scrub_info {
5191*0Sstevel@tonic-gate 	int		*csi_enable;	/* scrubber enable flag */
5192*0Sstevel@tonic-gate 	int		csi_freq;	/* scrubber calls per second */
5193*0Sstevel@tonic-gate 	int		csi_index;	/* index to chsm_outstanding[] */
5194*0Sstevel@tonic-gate 	uint_t		csi_inum;	/* scrubber interrupt number */
5195*0Sstevel@tonic-gate 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5196*0Sstevel@tonic-gate 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5197*0Sstevel@tonic-gate 	char		csi_name[3];	/* cache name for this scrub entry */
5198*0Sstevel@tonic-gate } cache_scrub_info[] = {
5199*0Sstevel@tonic-gate { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5200*0Sstevel@tonic-gate { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5201*0Sstevel@tonic-gate { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5202*0Sstevel@tonic-gate };
5203*0Sstevel@tonic-gate 
5204*0Sstevel@tonic-gate /*
5205*0Sstevel@tonic-gate  * If scrubbing is enabled, increment the outstanding request counter.  If it
5206*0Sstevel@tonic-gate  * is 1 (meaning there were no previous requests outstanding), call
5207*0Sstevel@tonic-gate  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5208*0Sstevel@tonic-gate  * a self trap.
5209*0Sstevel@tonic-gate  */
5210*0Sstevel@tonic-gate static void
5211*0Sstevel@tonic-gate do_scrub(struct scrub_info *csi)
5212*0Sstevel@tonic-gate {
5213*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5214*0Sstevel@tonic-gate 	int index = csi->csi_index;
5215*0Sstevel@tonic-gate 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5216*0Sstevel@tonic-gate 
5217*0Sstevel@tonic-gate 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5218*0Sstevel@tonic-gate 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5219*0Sstevel@tonic-gate 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5220*0Sstevel@tonic-gate 			    csi->csi_inum, 0);
5221*0Sstevel@tonic-gate 		}
5222*0Sstevel@tonic-gate 	}
5223*0Sstevel@tonic-gate }
5224*0Sstevel@tonic-gate 
5225*0Sstevel@tonic-gate /*
5226*0Sstevel@tonic-gate  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5227*0Sstevel@tonic-gate  * cross-trap the offline cpus.
5228*0Sstevel@tonic-gate  */
5229*0Sstevel@tonic-gate static void
5230*0Sstevel@tonic-gate do_scrub_offline(struct scrub_info *csi)
5231*0Sstevel@tonic-gate {
5232*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5233*0Sstevel@tonic-gate 
5234*0Sstevel@tonic-gate 	if (CPUSET_ISNULL(cpu_offline_set)) {
5235*0Sstevel@tonic-gate 		/*
5236*0Sstevel@tonic-gate 		 * No offline cpus - nothing to do
5237*0Sstevel@tonic-gate 		 */
5238*0Sstevel@tonic-gate 		return;
5239*0Sstevel@tonic-gate 	}
5240*0Sstevel@tonic-gate 
5241*0Sstevel@tonic-gate 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5242*0Sstevel@tonic-gate 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5243*0Sstevel@tonic-gate 		    csi->csi_index);
5244*0Sstevel@tonic-gate 	}
5245*0Sstevel@tonic-gate }
5246*0Sstevel@tonic-gate 
5247*0Sstevel@tonic-gate /*
5248*0Sstevel@tonic-gate  * This is the initial setup for the scrubber cyclics - it sets the
5249*0Sstevel@tonic-gate  * interrupt level, frequency, and function to call.
5250*0Sstevel@tonic-gate  */
5251*0Sstevel@tonic-gate /*ARGSUSED*/
5252*0Sstevel@tonic-gate static void
5253*0Sstevel@tonic-gate cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5254*0Sstevel@tonic-gate     cyc_time_t *when)
5255*0Sstevel@tonic-gate {
5256*0Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg;
5257*0Sstevel@tonic-gate 
5258*0Sstevel@tonic-gate 	ASSERT(csi != NULL);
5259*0Sstevel@tonic-gate 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5260*0Sstevel@tonic-gate 	hdlr->cyh_level = CY_LOW_LEVEL;
5261*0Sstevel@tonic-gate 	hdlr->cyh_arg = arg;
5262*0Sstevel@tonic-gate 
5263*0Sstevel@tonic-gate 	when->cyt_when = 0;	/* Start immediately */
5264*0Sstevel@tonic-gate 	when->cyt_interval = NANOSEC / csi->csi_freq;
5265*0Sstevel@tonic-gate }
5266*0Sstevel@tonic-gate 
5267*0Sstevel@tonic-gate /*
5268*0Sstevel@tonic-gate  * Initialization for cache scrubbing.
5269*0Sstevel@tonic-gate  * This routine is called AFTER all cpus have had cpu_init_private called
5270*0Sstevel@tonic-gate  * to initialize their private data areas.
5271*0Sstevel@tonic-gate  */
5272*0Sstevel@tonic-gate void
5273*0Sstevel@tonic-gate cpu_init_cache_scrub(void)
5274*0Sstevel@tonic-gate {
5275*0Sstevel@tonic-gate 	int i;
5276*0Sstevel@tonic-gate 	struct scrub_info *csi;
5277*0Sstevel@tonic-gate 	cyc_omni_handler_t omni_hdlr;
5278*0Sstevel@tonic-gate 	cyc_handler_t offline_hdlr;
5279*0Sstevel@tonic-gate 	cyc_time_t when;
5280*0Sstevel@tonic-gate 
5281*0Sstevel@tonic-gate 	/*
5282*0Sstevel@tonic-gate 	 * save away the maximum number of lines for the D$
5283*0Sstevel@tonic-gate 	 */
5284*0Sstevel@tonic-gate 	dcache_nlines = dcache_size / dcache_linesize;
5285*0Sstevel@tonic-gate 
5286*0Sstevel@tonic-gate 	/*
5287*0Sstevel@tonic-gate 	 * register the softints for the cache scrubbing
5288*0Sstevel@tonic-gate 	 */
5289*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5290*0Sstevel@tonic-gate 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5291*0Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5292*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5293*0Sstevel@tonic-gate 
5294*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5295*0Sstevel@tonic-gate 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5296*0Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5297*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5298*0Sstevel@tonic-gate 
5299*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5300*0Sstevel@tonic-gate 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5301*0Sstevel@tonic-gate 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5302*0Sstevel@tonic-gate 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5303*0Sstevel@tonic-gate 
5304*0Sstevel@tonic-gate 	/*
5305*0Sstevel@tonic-gate 	 * start the scrubbing for all the caches
5306*0Sstevel@tonic-gate 	 */
5307*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
5308*0Sstevel@tonic-gate 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5309*0Sstevel@tonic-gate 
5310*0Sstevel@tonic-gate 		csi = &cache_scrub_info[i];
5311*0Sstevel@tonic-gate 
5312*0Sstevel@tonic-gate 		if (!(*csi->csi_enable))
5313*0Sstevel@tonic-gate 			continue;
5314*0Sstevel@tonic-gate 
5315*0Sstevel@tonic-gate 		/*
5316*0Sstevel@tonic-gate 		 * force the following to be true:
5317*0Sstevel@tonic-gate 		 *	1 <= calls_a_sec <= hz
5318*0Sstevel@tonic-gate 		 */
5319*0Sstevel@tonic-gate 		if (csi->csi_freq > hz) {
5320*0Sstevel@tonic-gate 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5321*0Sstevel@tonic-gate 				"(%d); resetting to hz (%d)", csi->csi_name,
5322*0Sstevel@tonic-gate 				csi->csi_freq, hz);
5323*0Sstevel@tonic-gate 			csi->csi_freq = hz;
5324*0Sstevel@tonic-gate 		} else if (csi->csi_freq < 1) {
5325*0Sstevel@tonic-gate 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5326*0Sstevel@tonic-gate 				"(%d); resetting to 1", csi->csi_name,
5327*0Sstevel@tonic-gate 				csi->csi_freq);
5328*0Sstevel@tonic-gate 			csi->csi_freq = 1;
5329*0Sstevel@tonic-gate 		}
5330*0Sstevel@tonic-gate 
5331*0Sstevel@tonic-gate 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5332*0Sstevel@tonic-gate 		omni_hdlr.cyo_offline = NULL;
5333*0Sstevel@tonic-gate 		omni_hdlr.cyo_arg = (void *)csi;
5334*0Sstevel@tonic-gate 
5335*0Sstevel@tonic-gate 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5336*0Sstevel@tonic-gate 		offline_hdlr.cyh_arg = (void *)csi;
5337*0Sstevel@tonic-gate 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5338*0Sstevel@tonic-gate 
5339*0Sstevel@tonic-gate 		when.cyt_when = 0;	/* Start immediately */
5340*0Sstevel@tonic-gate 		when.cyt_interval = NANOSEC / csi->csi_freq;
5341*0Sstevel@tonic-gate 
5342*0Sstevel@tonic-gate 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5343*0Sstevel@tonic-gate 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5344*0Sstevel@tonic-gate 	}
5345*0Sstevel@tonic-gate 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5346*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
5347*0Sstevel@tonic-gate }
5348*0Sstevel@tonic-gate 
5349*0Sstevel@tonic-gate /*
5350*0Sstevel@tonic-gate  * Indicate that the specified cpu is idle.
5351*0Sstevel@tonic-gate  */
5352*0Sstevel@tonic-gate void
5353*0Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp)
5354*0Sstevel@tonic-gate {
5355*0Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
5356*0Sstevel@tonic-gate 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5357*0Sstevel@tonic-gate 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5358*0Sstevel@tonic-gate 	}
5359*0Sstevel@tonic-gate }
5360*0Sstevel@tonic-gate 
5361*0Sstevel@tonic-gate /*
5362*0Sstevel@tonic-gate  * Indicate that the specified cpu is busy.
5363*0Sstevel@tonic-gate  */
5364*0Sstevel@tonic-gate void
5365*0Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp)
5366*0Sstevel@tonic-gate {
5367*0Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
5368*0Sstevel@tonic-gate 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5369*0Sstevel@tonic-gate 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5370*0Sstevel@tonic-gate 	}
5371*0Sstevel@tonic-gate }
5372*0Sstevel@tonic-gate 
5373*0Sstevel@tonic-gate /*
5374*0Sstevel@tonic-gate  * Initialization for cache scrubbing for the specified cpu.
5375*0Sstevel@tonic-gate  */
5376*0Sstevel@tonic-gate void
5377*0Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp)
5378*0Sstevel@tonic-gate {
5379*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5380*0Sstevel@tonic-gate 	int cpuid = cp->cpu_id;
5381*0Sstevel@tonic-gate 
5382*0Sstevel@tonic-gate 	/* initialize the number of lines in the caches */
5383*0Sstevel@tonic-gate 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5384*0Sstevel@tonic-gate 	    cpunodes[cpuid].ecache_linesize;
5385*0Sstevel@tonic-gate 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5386*0Sstevel@tonic-gate 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5387*0Sstevel@tonic-gate 
5388*0Sstevel@tonic-gate 	/*
5389*0Sstevel@tonic-gate 	 * do_scrub() and do_scrub_offline() check both the global
5390*0Sstevel@tonic-gate 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5391*0Sstevel@tonic-gate 	 * check this value before scrubbing.  Currently, we use it to
5392*0Sstevel@tonic-gate 	 * disable the E$ scrubber on multi-core cpus or while running at
5393*0Sstevel@tonic-gate 	 * slowed speed.  For now, just turn everything on and allow
5394*0Sstevel@tonic-gate 	 * cpu_init_private() to change it if necessary.
5395*0Sstevel@tonic-gate 	 */
5396*0Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5397*0Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5398*0Sstevel@tonic-gate 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5399*0Sstevel@tonic-gate 
5400*0Sstevel@tonic-gate 	cpu_busy_ecache_scrub(cp);
5401*0Sstevel@tonic-gate }
5402*0Sstevel@tonic-gate 
5403*0Sstevel@tonic-gate /*
5404*0Sstevel@tonic-gate  * Un-initialization for cache scrubbing for the specified cpu.
5405*0Sstevel@tonic-gate  */
5406*0Sstevel@tonic-gate static void
5407*0Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5408*0Sstevel@tonic-gate {
5409*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5410*0Sstevel@tonic-gate 
5411*0Sstevel@tonic-gate 	/*
5412*0Sstevel@tonic-gate 	 * un-initialize bookkeeping for cache scrubbing
5413*0Sstevel@tonic-gate 	 */
5414*0Sstevel@tonic-gate 	bzero(csmp, sizeof (ch_scrub_misc_t));
5415*0Sstevel@tonic-gate 
5416*0Sstevel@tonic-gate 	cpu_idle_ecache_scrub(cp);
5417*0Sstevel@tonic-gate }
5418*0Sstevel@tonic-gate 
5419*0Sstevel@tonic-gate /*
5420*0Sstevel@tonic-gate  * Called periodically on each CPU to scrub the D$.
5421*0Sstevel@tonic-gate  */
5422*0Sstevel@tonic-gate static void
5423*0Sstevel@tonic-gate scrub_dcache(int how_many)
5424*0Sstevel@tonic-gate {
5425*0Sstevel@tonic-gate 	int i;
5426*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5427*0Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5428*0Sstevel@tonic-gate 
5429*0Sstevel@tonic-gate 	/*
5430*0Sstevel@tonic-gate 	 * scrub the desired number of lines
5431*0Sstevel@tonic-gate 	 */
5432*0Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
5433*0Sstevel@tonic-gate 		/*
5434*0Sstevel@tonic-gate 		 * scrub a D$ line
5435*0Sstevel@tonic-gate 		 */
5436*0Sstevel@tonic-gate 		dcache_inval_line(index);
5437*0Sstevel@tonic-gate 
5438*0Sstevel@tonic-gate 		/*
5439*0Sstevel@tonic-gate 		 * calculate the next D$ line to scrub, assumes
5440*0Sstevel@tonic-gate 		 * that dcache_nlines is a power of 2
5441*0Sstevel@tonic-gate 		 */
5442*0Sstevel@tonic-gate 		index = (index + 1) & (dcache_nlines - 1);
5443*0Sstevel@tonic-gate 	}
5444*0Sstevel@tonic-gate 
5445*0Sstevel@tonic-gate 	/*
5446*0Sstevel@tonic-gate 	 * set the scrub index for the next visit
5447*0Sstevel@tonic-gate 	 */
5448*0Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5449*0Sstevel@tonic-gate }
5450*0Sstevel@tonic-gate 
5451*0Sstevel@tonic-gate /*
5452*0Sstevel@tonic-gate  * Handler for D$ scrub inum softint. Call scrub_dcache until
5453*0Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
5454*0Sstevel@tonic-gate  */
5455*0Sstevel@tonic-gate /*ARGSUSED*/
5456*0Sstevel@tonic-gate static uint_t
5457*0Sstevel@tonic-gate scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5458*0Sstevel@tonic-gate {
5459*0Sstevel@tonic-gate 	int i;
5460*0Sstevel@tonic-gate 	int how_many;
5461*0Sstevel@tonic-gate 	int outstanding;
5462*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5463*0Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5464*0Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
5465*0Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5466*0Sstevel@tonic-gate 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5467*0Sstevel@tonic-gate 
5468*0Sstevel@tonic-gate 	/*
5469*0Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
5470*0Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
5471*0Sstevel@tonic-gate 	 * cache is scanned every second.
5472*0Sstevel@tonic-gate 	 */
5473*0Sstevel@tonic-gate 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5474*0Sstevel@tonic-gate 
5475*0Sstevel@tonic-gate 	do {
5476*0Sstevel@tonic-gate 		outstanding = *countp;
5477*0Sstevel@tonic-gate 		ASSERT(outstanding > 0);
5478*0Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
5479*0Sstevel@tonic-gate 			scrub_dcache(how_many);
5480*0Sstevel@tonic-gate 		}
5481*0Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
5482*0Sstevel@tonic-gate 
5483*0Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
5484*0Sstevel@tonic-gate }
5485*0Sstevel@tonic-gate 
5486*0Sstevel@tonic-gate /*
5487*0Sstevel@tonic-gate  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5488*0Sstevel@tonic-gate  * by invalidating lines. Due to the characteristics of the ASI which
5489*0Sstevel@tonic-gate  * is used to invalidate an I$ line, the entire I$ must be invalidated
5490*0Sstevel@tonic-gate  * vs. an individual I$ line.
5491*0Sstevel@tonic-gate  */
5492*0Sstevel@tonic-gate static void
5493*0Sstevel@tonic-gate scrub_icache(int how_many)
5494*0Sstevel@tonic-gate {
5495*0Sstevel@tonic-gate 	int i;
5496*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5497*0Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5498*0Sstevel@tonic-gate 	int icache_nlines = csmp->chsm_icache_nlines;
5499*0Sstevel@tonic-gate 
5500*0Sstevel@tonic-gate 	/*
5501*0Sstevel@tonic-gate 	 * scrub the desired number of lines
5502*0Sstevel@tonic-gate 	 */
5503*0Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
5504*0Sstevel@tonic-gate 		/*
5505*0Sstevel@tonic-gate 		 * since the entire I$ must be scrubbed at once,
5506*0Sstevel@tonic-gate 		 * wait until the index wraps to zero to invalidate
5507*0Sstevel@tonic-gate 		 * the entire I$
5508*0Sstevel@tonic-gate 		 */
5509*0Sstevel@tonic-gate 		if (index == 0) {
5510*0Sstevel@tonic-gate 			icache_inval_all();
5511*0Sstevel@tonic-gate 		}
5512*0Sstevel@tonic-gate 
5513*0Sstevel@tonic-gate 		/*
5514*0Sstevel@tonic-gate 		 * calculate the next I$ line to scrub, assumes
5515*0Sstevel@tonic-gate 		 * that chsm_icache_nlines is a power of 2
5516*0Sstevel@tonic-gate 		 */
5517*0Sstevel@tonic-gate 		index = (index + 1) & (icache_nlines - 1);
5518*0Sstevel@tonic-gate 	}
5519*0Sstevel@tonic-gate 
5520*0Sstevel@tonic-gate 	/*
5521*0Sstevel@tonic-gate 	 * set the scrub index for the next visit
5522*0Sstevel@tonic-gate 	 */
5523*0Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5524*0Sstevel@tonic-gate }
5525*0Sstevel@tonic-gate 
5526*0Sstevel@tonic-gate /*
5527*0Sstevel@tonic-gate  * Handler for I$ scrub inum softint. Call scrub_icache until
5528*0Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
5529*0Sstevel@tonic-gate  */
5530*0Sstevel@tonic-gate /*ARGSUSED*/
5531*0Sstevel@tonic-gate static uint_t
5532*0Sstevel@tonic-gate scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5533*0Sstevel@tonic-gate {
5534*0Sstevel@tonic-gate 	int i;
5535*0Sstevel@tonic-gate 	int how_many;
5536*0Sstevel@tonic-gate 	int outstanding;
5537*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5538*0Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5539*0Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
5540*0Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5541*0Sstevel@tonic-gate 	    icache_scan_rate_idle : icache_scan_rate_busy;
5542*0Sstevel@tonic-gate 	int icache_nlines = csmp->chsm_icache_nlines;
5543*0Sstevel@tonic-gate 
5544*0Sstevel@tonic-gate 	/*
5545*0Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
5546*0Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
5547*0Sstevel@tonic-gate 	 * cache is scanned every second.
5548*0Sstevel@tonic-gate 	 */
5549*0Sstevel@tonic-gate 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5550*0Sstevel@tonic-gate 
5551*0Sstevel@tonic-gate 	do {
5552*0Sstevel@tonic-gate 		outstanding = *countp;
5553*0Sstevel@tonic-gate 		ASSERT(outstanding > 0);
5554*0Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
5555*0Sstevel@tonic-gate 			scrub_icache(how_many);
5556*0Sstevel@tonic-gate 		}
5557*0Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
5558*0Sstevel@tonic-gate 
5559*0Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
5560*0Sstevel@tonic-gate }
5561*0Sstevel@tonic-gate 
5562*0Sstevel@tonic-gate /*
5563*0Sstevel@tonic-gate  * Called periodically on each CPU to scrub the E$.
5564*0Sstevel@tonic-gate  */
5565*0Sstevel@tonic-gate static void
5566*0Sstevel@tonic-gate scrub_ecache(int how_many)
5567*0Sstevel@tonic-gate {
5568*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5569*0Sstevel@tonic-gate 	int i;
5570*0Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
5571*0Sstevel@tonic-gate 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5572*0Sstevel@tonic-gate 	int nlines = csmp->chsm_ecache_nlines;
5573*0Sstevel@tonic-gate 	int linesize = cpunodes[cpuid].ecache_linesize;
5574*0Sstevel@tonic-gate 	int ec_set_size = cpu_ecache_set_size(CPU);
5575*0Sstevel@tonic-gate 
5576*0Sstevel@tonic-gate 	/*
5577*0Sstevel@tonic-gate 	 * scrub the desired number of lines
5578*0Sstevel@tonic-gate 	 */
5579*0Sstevel@tonic-gate 	for (i = 0; i < how_many; i++) {
5580*0Sstevel@tonic-gate 		/*
5581*0Sstevel@tonic-gate 		 * scrub the E$ line
5582*0Sstevel@tonic-gate 		 */
5583*0Sstevel@tonic-gate 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5584*0Sstevel@tonic-gate 		    ec_set_size);
5585*0Sstevel@tonic-gate 
5586*0Sstevel@tonic-gate 		/*
5587*0Sstevel@tonic-gate 		 * calculate the next E$ line to scrub based on twice
5588*0Sstevel@tonic-gate 		 * the number of E$ lines (to displace lines containing
5589*0Sstevel@tonic-gate 		 * flush area data), assumes that the number of lines
5590*0Sstevel@tonic-gate 		 * is a power of 2
5591*0Sstevel@tonic-gate 		 */
5592*0Sstevel@tonic-gate 		index = (index + 1) & ((nlines << 1) - 1);
5593*0Sstevel@tonic-gate 	}
5594*0Sstevel@tonic-gate 
5595*0Sstevel@tonic-gate 	/*
5596*0Sstevel@tonic-gate 	 * set the ecache scrub index for the next visit
5597*0Sstevel@tonic-gate 	 */
5598*0Sstevel@tonic-gate 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5599*0Sstevel@tonic-gate }
5600*0Sstevel@tonic-gate 
5601*0Sstevel@tonic-gate /*
5602*0Sstevel@tonic-gate  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5603*0Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
5604*0Sstevel@tonic-gate  */
5605*0Sstevel@tonic-gate /*ARGSUSED*/
5606*0Sstevel@tonic-gate static uint_t
5607*0Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5608*0Sstevel@tonic-gate {
5609*0Sstevel@tonic-gate 	int i;
5610*0Sstevel@tonic-gate 	int how_many;
5611*0Sstevel@tonic-gate 	int outstanding;
5612*0Sstevel@tonic-gate 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5613*0Sstevel@tonic-gate 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5614*0Sstevel@tonic-gate 	struct scrub_info *csi = (struct scrub_info *)arg1;
5615*0Sstevel@tonic-gate 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5616*0Sstevel@tonic-gate 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5617*0Sstevel@tonic-gate 	int ecache_nlines = csmp->chsm_ecache_nlines;
5618*0Sstevel@tonic-gate 
5619*0Sstevel@tonic-gate 	/*
5620*0Sstevel@tonic-gate 	 * The scan rates are expressed in units of tenths of a
5621*0Sstevel@tonic-gate 	 * percent.  A scan rate of 1000 (100%) means the whole
5622*0Sstevel@tonic-gate 	 * cache is scanned every second.
5623*0Sstevel@tonic-gate 	 */
5624*0Sstevel@tonic-gate 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5625*0Sstevel@tonic-gate 
5626*0Sstevel@tonic-gate 	do {
5627*0Sstevel@tonic-gate 		outstanding = *countp;
5628*0Sstevel@tonic-gate 		ASSERT(outstanding > 0);
5629*0Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++) {
5630*0Sstevel@tonic-gate 			scrub_ecache(how_many);
5631*0Sstevel@tonic-gate 		}
5632*0Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
5633*0Sstevel@tonic-gate 
5634*0Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
5635*0Sstevel@tonic-gate }
5636*0Sstevel@tonic-gate 
5637*0Sstevel@tonic-gate /*
5638*0Sstevel@tonic-gate  * Timeout function to reenable CE
5639*0Sstevel@tonic-gate  */
5640*0Sstevel@tonic-gate static void
5641*0Sstevel@tonic-gate cpu_delayed_check_ce_errors(void *arg)
5642*0Sstevel@tonic-gate {
5643*0Sstevel@tonic-gate 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5644*0Sstevel@tonic-gate 	    TQ_NOSLEEP)) {
5645*0Sstevel@tonic-gate 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5646*0Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5647*0Sstevel@tonic-gate 	}
5648*0Sstevel@tonic-gate }
5649*0Sstevel@tonic-gate 
5650*0Sstevel@tonic-gate /*
5651*0Sstevel@tonic-gate  * CE Deferred Re-enable after trap.
5652*0Sstevel@tonic-gate  *
5653*0Sstevel@tonic-gate  * When the CPU gets a disrupting trap for any of the errors
5654*0Sstevel@tonic-gate  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5655*0Sstevel@tonic-gate  * immediately. To eliminate the possibility of multiple CEs causing
5656*0Sstevel@tonic-gate  * recursive stack overflow in the trap handler, we cannot
5657*0Sstevel@tonic-gate  * reenable CEEN while still running in the trap handler. Instead,
5658*0Sstevel@tonic-gate  * after a CE is logged on a CPU, we schedule a timeout function,
5659*0Sstevel@tonic-gate  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5660*0Sstevel@tonic-gate  * seconds. This function will check whether any further CEs
5661*0Sstevel@tonic-gate  * have occurred on that CPU, and if none have, will reenable CEEN.
5662*0Sstevel@tonic-gate  *
5663*0Sstevel@tonic-gate  * If further CEs have occurred while CEEN is disabled, another
5664*0Sstevel@tonic-gate  * timeout will be scheduled. This is to ensure that the CPU can
5665*0Sstevel@tonic-gate  * make progress in the face of CE 'storms', and that it does not
5666*0Sstevel@tonic-gate  * spend all its time logging CE errors.
5667*0Sstevel@tonic-gate  */
5668*0Sstevel@tonic-gate static void
5669*0Sstevel@tonic-gate cpu_check_ce_errors(void *arg)
5670*0Sstevel@tonic-gate {
5671*0Sstevel@tonic-gate 	int	cpuid = (int)arg;
5672*0Sstevel@tonic-gate 	cpu_t	*cp;
5673*0Sstevel@tonic-gate 
5674*0Sstevel@tonic-gate 	/*
5675*0Sstevel@tonic-gate 	 * We acquire cpu_lock.
5676*0Sstevel@tonic-gate 	 */
5677*0Sstevel@tonic-gate 	ASSERT(curthread->t_pil == 0);
5678*0Sstevel@tonic-gate 
5679*0Sstevel@tonic-gate 	/*
5680*0Sstevel@tonic-gate 	 * verify that the cpu is still around, DR
5681*0Sstevel@tonic-gate 	 * could have got there first ...
5682*0Sstevel@tonic-gate 	 */
5683*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
5684*0Sstevel@tonic-gate 	cp = cpu_get(cpuid);
5685*0Sstevel@tonic-gate 	if (cp == NULL) {
5686*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5687*0Sstevel@tonic-gate 		return;
5688*0Sstevel@tonic-gate 	}
5689*0Sstevel@tonic-gate 	/*
5690*0Sstevel@tonic-gate 	 * make sure we don't migrate across CPUs
5691*0Sstevel@tonic-gate 	 * while checking our CE status.
5692*0Sstevel@tonic-gate 	 */
5693*0Sstevel@tonic-gate 	kpreempt_disable();
5694*0Sstevel@tonic-gate 
5695*0Sstevel@tonic-gate 	/*
5696*0Sstevel@tonic-gate 	 * If we are running on the CPU that got the
5697*0Sstevel@tonic-gate 	 * CE, we can do the checks directly.
5698*0Sstevel@tonic-gate 	 */
5699*0Sstevel@tonic-gate 	if (cp->cpu_id == CPU->cpu_id) {
5700*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5701*0Sstevel@tonic-gate 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5702*0Sstevel@tonic-gate 		kpreempt_enable();
5703*0Sstevel@tonic-gate 		return;
5704*0Sstevel@tonic-gate 	}
5705*0Sstevel@tonic-gate 	kpreempt_enable();
5706*0Sstevel@tonic-gate 
5707*0Sstevel@tonic-gate 	/*
5708*0Sstevel@tonic-gate 	 * send an x-call to get the CPU that originally
5709*0Sstevel@tonic-gate 	 * got the CE to do the necessary checks. If we can't
5710*0Sstevel@tonic-gate 	 * send the x-call, reschedule the timeout, otherwise we
5711*0Sstevel@tonic-gate 	 * lose CEEN forever on that CPU.
5712*0Sstevel@tonic-gate 	 */
5713*0Sstevel@tonic-gate 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5714*0Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5715*0Sstevel@tonic-gate 		    TIMEOUT_CEEN_CHECK, 0);
5716*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5717*0Sstevel@tonic-gate 	} else {
5718*0Sstevel@tonic-gate 		/*
5719*0Sstevel@tonic-gate 		 * When the CPU is not accepting xcalls, or
5720*0Sstevel@tonic-gate 		 * the processor is offlined, we don't want to
5721*0Sstevel@tonic-gate 		 * incur the extra overhead of trying to schedule the
5722*0Sstevel@tonic-gate 		 * CE timeout indefinitely. However, we don't want to lose
5723*0Sstevel@tonic-gate 		 * CE checking forever.
5724*0Sstevel@tonic-gate 		 *
5725*0Sstevel@tonic-gate 		 * Keep rescheduling the timeout, accepting the additional
5726*0Sstevel@tonic-gate 		 * overhead as the cost of correctness in the case where we get
5727*0Sstevel@tonic-gate 		 * a CE, disable CEEN, offline the CPU during the
5728*0Sstevel@tonic-gate 		 * the timeout interval, and then online it at some
5729*0Sstevel@tonic-gate 		 * point in the future. This is unlikely given the short
5730*0Sstevel@tonic-gate 		 * cpu_ceen_delay_secs.
5731*0Sstevel@tonic-gate 		 */
5732*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5733*0Sstevel@tonic-gate 		(void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id,
5734*0Sstevel@tonic-gate 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5735*0Sstevel@tonic-gate 	}
5736*0Sstevel@tonic-gate }
5737*0Sstevel@tonic-gate 
5738*0Sstevel@tonic-gate /*
5739*0Sstevel@tonic-gate  * This routine will check whether CEs have occurred while
5740*0Sstevel@tonic-gate  * CEEN is disabled. Any CEs detected will be logged and, if
5741*0Sstevel@tonic-gate  * possible, scrubbed.
5742*0Sstevel@tonic-gate  *
5743*0Sstevel@tonic-gate  * The memscrubber will also use this routine to clear any errors
5744*0Sstevel@tonic-gate  * caused by its scrubbing with CEEN disabled.
5745*0Sstevel@tonic-gate  *
5746*0Sstevel@tonic-gate  * flag == SCRUBBER_CEEN_CHECK
5747*0Sstevel@tonic-gate  *		called from memscrubber, just check/scrub, no reset
5748*0Sstevel@tonic-gate  *		paddr 	physical addr. for start of scrub pages
5749*0Sstevel@tonic-gate  *		vaddr 	virtual addr. for scrub area
5750*0Sstevel@tonic-gate  *		psz	page size of area to be scrubbed
5751*0Sstevel@tonic-gate  *
5752*0Sstevel@tonic-gate  * flag == TIMEOUT_CEEN_CHECK
5753*0Sstevel@tonic-gate  *		timeout function has triggered, reset timeout or CEEN
5754*0Sstevel@tonic-gate  *
5755*0Sstevel@tonic-gate  * Note: We must not migrate cpus during this function.  This can be
5756*0Sstevel@tonic-gate  * achieved by one of:
5757*0Sstevel@tonic-gate  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5758*0Sstevel@tonic-gate  *	The flag value must be first xcall argument.
5759*0Sstevel@tonic-gate  *    - disabling kernel preemption.  This should be done for very short
5760*0Sstevel@tonic-gate  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5761*0Sstevel@tonic-gate  *	scrub an extended area with cpu_check_block.  The call for
5762*0Sstevel@tonic-gate  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5763*0Sstevel@tonic-gate  *	brief for this case.
5764*0Sstevel@tonic-gate  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5765*0Sstevel@tonic-gate  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5766*0Sstevel@tonic-gate  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5767*0Sstevel@tonic-gate  */
5768*0Sstevel@tonic-gate void
5769*0Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5770*0Sstevel@tonic-gate {
5771*0Sstevel@tonic-gate 	ch_cpu_errors_t	cpu_error_regs;
5772*0Sstevel@tonic-gate 	uint64_t	ec_err_enable;
5773*0Sstevel@tonic-gate 	uint64_t	page_offset;
5774*0Sstevel@tonic-gate 
5775*0Sstevel@tonic-gate 	/* Read AFSR */
5776*0Sstevel@tonic-gate 	get_cpu_error_state(&cpu_error_regs);
5777*0Sstevel@tonic-gate 
5778*0Sstevel@tonic-gate 	/*
5779*0Sstevel@tonic-gate 	 * If no CEEN errors have occurred during the timeout
5780*0Sstevel@tonic-gate 	 * interval, it is safe to re-enable CEEN and exit.
5781*0Sstevel@tonic-gate 	 */
5782*0Sstevel@tonic-gate 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5783*0Sstevel@tonic-gate 		if (flag == TIMEOUT_CEEN_CHECK &&
5784*0Sstevel@tonic-gate 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5785*0Sstevel@tonic-gate 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5786*0Sstevel@tonic-gate 		return;
5787*0Sstevel@tonic-gate 	}
5788*0Sstevel@tonic-gate 
5789*0Sstevel@tonic-gate 	/*
5790*0Sstevel@tonic-gate 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5791*0Sstevel@tonic-gate 	 * we log/clear the error.
5792*0Sstevel@tonic-gate 	 */
5793*0Sstevel@tonic-gate 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5794*0Sstevel@tonic-gate 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5795*0Sstevel@tonic-gate 
5796*0Sstevel@tonic-gate 	/*
5797*0Sstevel@tonic-gate 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5798*0Sstevel@tonic-gate 	 * timeout will be rescheduled when the error is logged.
5799*0Sstevel@tonic-gate 	 */
5800*0Sstevel@tonic-gate 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5801*0Sstevel@tonic-gate 	    cpu_ce_detected(&cpu_error_regs,
5802*0Sstevel@tonic-gate 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5803*0Sstevel@tonic-gate 	else
5804*0Sstevel@tonic-gate 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5805*0Sstevel@tonic-gate 
5806*0Sstevel@tonic-gate 	/*
5807*0Sstevel@tonic-gate 	 * If the memory scrubber runs while CEEN is
5808*0Sstevel@tonic-gate 	 * disabled, (or if CEEN is disabled during the
5809*0Sstevel@tonic-gate 	 * scrub as a result of a CE being triggered by
5810*0Sstevel@tonic-gate 	 * it), the range being scrubbed will not be
5811*0Sstevel@tonic-gate 	 * completely cleaned. If there are multiple CEs
5812*0Sstevel@tonic-gate 	 * in the range at most two of these will be dealt
5813*0Sstevel@tonic-gate 	 * with, (one by the trap handler and one by the
5814*0Sstevel@tonic-gate 	 * timeout). It is also possible that none are dealt
5815*0Sstevel@tonic-gate 	 * with, (CEEN disabled and another CE occurs before
5816*0Sstevel@tonic-gate 	 * the timeout triggers). So to ensure that the
5817*0Sstevel@tonic-gate 	 * memory is actually scrubbed, we have to access each
5818*0Sstevel@tonic-gate 	 * memory location in the range and then check whether
5819*0Sstevel@tonic-gate 	 * that access causes a CE.
5820*0Sstevel@tonic-gate 	 */
5821*0Sstevel@tonic-gate 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5822*0Sstevel@tonic-gate 		if ((cpu_error_regs.afar >= pa) &&
5823*0Sstevel@tonic-gate 		    (cpu_error_regs.afar < (pa + psz))) {
5824*0Sstevel@tonic-gate 			/*
5825*0Sstevel@tonic-gate 			 * Force a load from physical memory for each
5826*0Sstevel@tonic-gate 			 * 64-byte block, then check AFSR to determine
5827*0Sstevel@tonic-gate 			 * whether this access caused an error.
5828*0Sstevel@tonic-gate 			 *
5829*0Sstevel@tonic-gate 			 * This is a slow way to do a scrub, but as it will
5830*0Sstevel@tonic-gate 			 * only be invoked when the memory scrubber actually
5831*0Sstevel@tonic-gate 			 * triggered a CE, it should not happen too
5832*0Sstevel@tonic-gate 			 * frequently.
5833*0Sstevel@tonic-gate 			 *
5834*0Sstevel@tonic-gate 			 * cut down what we need to check as the scrubber
5835*0Sstevel@tonic-gate 			 * has verified up to AFAR, so get it's offset
5836*0Sstevel@tonic-gate 			 * into the page and start there.
5837*0Sstevel@tonic-gate 			 */
5838*0Sstevel@tonic-gate 			page_offset = (uint64_t)(cpu_error_regs.afar &
5839*0Sstevel@tonic-gate 			    (psz - 1));
5840*0Sstevel@tonic-gate 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5841*0Sstevel@tonic-gate 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5842*0Sstevel@tonic-gate 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5843*0Sstevel@tonic-gate 			    psz);
5844*0Sstevel@tonic-gate 		}
5845*0Sstevel@tonic-gate 	}
5846*0Sstevel@tonic-gate 
5847*0Sstevel@tonic-gate 	/*
5848*0Sstevel@tonic-gate 	 * Reset error enable if this CE is not masked.
5849*0Sstevel@tonic-gate 	 */
5850*0Sstevel@tonic-gate 	if ((flag == TIMEOUT_CEEN_CHECK) &&
5851*0Sstevel@tonic-gate 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
5852*0Sstevel@tonic-gate 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
5853*0Sstevel@tonic-gate 
5854*0Sstevel@tonic-gate }
5855*0Sstevel@tonic-gate 
5856*0Sstevel@tonic-gate /*
5857*0Sstevel@tonic-gate  * Attempt a cpu logout for an error that we did not trap for, such
5858*0Sstevel@tonic-gate  * as a CE noticed with CEEN off.  It is assumed that we are still running
5859*0Sstevel@tonic-gate  * on the cpu that took the error and that we cannot migrate.  Returns
5860*0Sstevel@tonic-gate  * 0 on success, otherwise nonzero.
5861*0Sstevel@tonic-gate  */
5862*0Sstevel@tonic-gate static int
5863*0Sstevel@tonic-gate cpu_ce_delayed_ec_logout(uint64_t afar)
5864*0Sstevel@tonic-gate {
5865*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
5866*0Sstevel@tonic-gate 
5867*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL)
5868*0Sstevel@tonic-gate 		return (0);
5869*0Sstevel@tonic-gate 
5870*0Sstevel@tonic-gate 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5871*0Sstevel@tonic-gate 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
5872*0Sstevel@tonic-gate 	    LOGOUT_INVALID)
5873*0Sstevel@tonic-gate 		return (0);
5874*0Sstevel@tonic-gate 
5875*0Sstevel@tonic-gate 	cpu_delayed_logout(afar, clop);
5876*0Sstevel@tonic-gate 	return (1);
5877*0Sstevel@tonic-gate }
5878*0Sstevel@tonic-gate 
5879*0Sstevel@tonic-gate /*
5880*0Sstevel@tonic-gate  * We got an error while CEEN was disabled. We
5881*0Sstevel@tonic-gate  * need to clean up after it and log whatever
5882*0Sstevel@tonic-gate  * information we have on the CE.
5883*0Sstevel@tonic-gate  */
5884*0Sstevel@tonic-gate void
5885*0Sstevel@tonic-gate cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
5886*0Sstevel@tonic-gate {
5887*0Sstevel@tonic-gate 	ch_async_flt_t 	ch_flt;
5888*0Sstevel@tonic-gate 	struct async_flt *aflt;
5889*0Sstevel@tonic-gate 	char 		pr_reason[MAX_REASON_STRING];
5890*0Sstevel@tonic-gate 
5891*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
5892*0Sstevel@tonic-gate 	ch_flt.flt_trapped_ce = flag;
5893*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&ch_flt;
5894*0Sstevel@tonic-gate 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
5895*0Sstevel@tonic-gate 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
5896*0Sstevel@tonic-gate 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5897*0Sstevel@tonic-gate 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
5898*0Sstevel@tonic-gate 	aflt->flt_addr = cpu_error_regs->afar;
5899*0Sstevel@tonic-gate #if defined(SERRANO)
5900*0Sstevel@tonic-gate 	ch_flt.afar2 = cpu_error_regs->afar2;
5901*0Sstevel@tonic-gate #endif	/* SERRANO */
5902*0Sstevel@tonic-gate 	aflt->flt_pc = NULL;
5903*0Sstevel@tonic-gate 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
5904*0Sstevel@tonic-gate 	aflt->flt_tl = 0;
5905*0Sstevel@tonic-gate 	aflt->flt_panic = 0;
5906*0Sstevel@tonic-gate 	cpu_log_and_clear_ce(&ch_flt);
5907*0Sstevel@tonic-gate 
5908*0Sstevel@tonic-gate 	/*
5909*0Sstevel@tonic-gate 	 * check if we caused any errors during cleanup
5910*0Sstevel@tonic-gate 	 */
5911*0Sstevel@tonic-gate 	if (clear_errors(&ch_flt)) {
5912*0Sstevel@tonic-gate 		pr_reason[0] = '\0';
5913*0Sstevel@tonic-gate 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
5914*0Sstevel@tonic-gate 		    NULL);
5915*0Sstevel@tonic-gate 	}
5916*0Sstevel@tonic-gate }
5917*0Sstevel@tonic-gate 
5918*0Sstevel@tonic-gate /*
5919*0Sstevel@tonic-gate  * Log/clear CEEN-controlled disrupting errors
5920*0Sstevel@tonic-gate  */
5921*0Sstevel@tonic-gate static void
5922*0Sstevel@tonic-gate cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
5923*0Sstevel@tonic-gate {
5924*0Sstevel@tonic-gate 	struct async_flt *aflt;
5925*0Sstevel@tonic-gate 	uint64_t afsr, afsr_errs;
5926*0Sstevel@tonic-gate 	ch_cpu_logout_t *clop;
5927*0Sstevel@tonic-gate 	char 		pr_reason[MAX_REASON_STRING];
5928*0Sstevel@tonic-gate 	on_trap_data_t	*otp = curthread->t_ontrap;
5929*0Sstevel@tonic-gate 
5930*0Sstevel@tonic-gate 	aflt = (struct async_flt *)ch_flt;
5931*0Sstevel@tonic-gate 	afsr = aflt->flt_stat;
5932*0Sstevel@tonic-gate 	afsr_errs = ch_flt->afsr_errs;
5933*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
5934*0Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
5935*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
5936*0Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
5937*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
5938*0Sstevel@tonic-gate 	aflt->flt_status = ECC_C_TRAP;
5939*0Sstevel@tonic-gate 
5940*0Sstevel@tonic-gate 	pr_reason[0] = '\0';
5941*0Sstevel@tonic-gate 	/*
5942*0Sstevel@tonic-gate 	 * Get the CPU log out info for Disrupting Trap.
5943*0Sstevel@tonic-gate 	 */
5944*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) == NULL) {
5945*0Sstevel@tonic-gate 		clop = NULL;
5946*0Sstevel@tonic-gate 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
5947*0Sstevel@tonic-gate 	} else {
5948*0Sstevel@tonic-gate 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5949*0Sstevel@tonic-gate 	}
5950*0Sstevel@tonic-gate 
5951*0Sstevel@tonic-gate 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
5952*0Sstevel@tonic-gate 		ch_cpu_errors_t cpu_error_regs;
5953*0Sstevel@tonic-gate 
5954*0Sstevel@tonic-gate 		get_cpu_error_state(&cpu_error_regs);
5955*0Sstevel@tonic-gate 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
5956*0Sstevel@tonic-gate 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
5957*0Sstevel@tonic-gate 		clop->clo_data.chd_afar = cpu_error_regs.afar;
5958*0Sstevel@tonic-gate 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
5959*0Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
5960*0Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
5961*0Sstevel@tonic-gate 		clop->clo_sdw_data.chd_afsr_ext =
5962*0Sstevel@tonic-gate 		    cpu_error_regs.shadow_afsr_ext;
5963*0Sstevel@tonic-gate #if defined(SERRANO)
5964*0Sstevel@tonic-gate 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
5965*0Sstevel@tonic-gate #endif	/* SERRANO */
5966*0Sstevel@tonic-gate 		ch_flt->flt_data_incomplete = 1;
5967*0Sstevel@tonic-gate 
5968*0Sstevel@tonic-gate 		/*
5969*0Sstevel@tonic-gate 		 * The logging/clear code expects AFSR/AFAR to be cleared.
5970*0Sstevel@tonic-gate 		 * The trap handler does it for CEEN enabled errors
5971*0Sstevel@tonic-gate 		 * so we need to do it here.
5972*0Sstevel@tonic-gate 		 */
5973*0Sstevel@tonic-gate 		set_cpu_error_state(&cpu_error_regs);
5974*0Sstevel@tonic-gate 	}
5975*0Sstevel@tonic-gate 
5976*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
5977*0Sstevel@tonic-gate 	/*
5978*0Sstevel@tonic-gate 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
5979*0Sstevel@tonic-gate 	 * For Serrano, even thou we do have the AFAR, we still do the
5980*0Sstevel@tonic-gate 	 * scrub on the RCE side since that's where the error type can
5981*0Sstevel@tonic-gate 	 * be properly classified as intermittent, persistent, etc.
5982*0Sstevel@tonic-gate 	 *
5983*0Sstevel@tonic-gate 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
5984*0Sstevel@tonic-gate 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5985*0Sstevel@tonic-gate 	 * the flt_status bits.
5986*0Sstevel@tonic-gate 	 */
5987*0Sstevel@tonic-gate 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
5988*0Sstevel@tonic-gate 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5989*0Sstevel@tonic-gate 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
5990*0Sstevel@tonic-gate 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
5991*0Sstevel@tonic-gate 	}
5992*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
5993*0Sstevel@tonic-gate 	/*
5994*0Sstevel@tonic-gate 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
5995*0Sstevel@tonic-gate 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5996*0Sstevel@tonic-gate 	 * the flt_status bits.
5997*0Sstevel@tonic-gate 	 */
5998*0Sstevel@tonic-gate 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
5999*0Sstevel@tonic-gate 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6000*0Sstevel@tonic-gate 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6001*0Sstevel@tonic-gate 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6002*0Sstevel@tonic-gate 		}
6003*0Sstevel@tonic-gate 	}
6004*0Sstevel@tonic-gate 
6005*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
6006*0Sstevel@tonic-gate 
6007*0Sstevel@tonic-gate 	/*
6008*0Sstevel@tonic-gate 	 * Update flt_prot if this error occurred under on_trap protection.
6009*0Sstevel@tonic-gate 	 */
6010*0Sstevel@tonic-gate 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6011*0Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_EC;
6012*0Sstevel@tonic-gate 
6013*0Sstevel@tonic-gate 	/*
6014*0Sstevel@tonic-gate 	 * Queue events on the async event queue, one event per error bit.
6015*0Sstevel@tonic-gate 	 */
6016*0Sstevel@tonic-gate 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6017*0Sstevel@tonic-gate 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6018*0Sstevel@tonic-gate 		ch_flt->flt_type = CPU_INV_AFSR;
6019*0Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6020*0Sstevel@tonic-gate 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6021*0Sstevel@tonic-gate 		    aflt->flt_panic);
6022*0Sstevel@tonic-gate 	}
6023*0Sstevel@tonic-gate 
6024*0Sstevel@tonic-gate 	/*
6025*0Sstevel@tonic-gate 	 * Zero out + invalidate CPU logout.
6026*0Sstevel@tonic-gate 	 */
6027*0Sstevel@tonic-gate 	if (clop) {
6028*0Sstevel@tonic-gate 		bzero(clop, sizeof (ch_cpu_logout_t));
6029*0Sstevel@tonic-gate 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6030*0Sstevel@tonic-gate 	}
6031*0Sstevel@tonic-gate 
6032*0Sstevel@tonic-gate 	/*
6033*0Sstevel@tonic-gate 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6034*0Sstevel@tonic-gate 	 * was disabled, we need to flush either the entire
6035*0Sstevel@tonic-gate 	 * E$ or an E$ line.
6036*0Sstevel@tonic-gate 	 */
6037*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
6038*0Sstevel@tonic-gate 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6039*0Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
6040*0Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6041*0Sstevel@tonic-gate 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6042*0Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
6043*0Sstevel@tonic-gate 		cpu_error_ecache_flush(ch_flt);
6044*0Sstevel@tonic-gate 
6045*0Sstevel@tonic-gate }
6046*0Sstevel@tonic-gate 
6047*0Sstevel@tonic-gate /*
6048*0Sstevel@tonic-gate  * depending on the error type, we determine whether we
6049*0Sstevel@tonic-gate  * need to flush the entire ecache or just a line.
6050*0Sstevel@tonic-gate  */
6051*0Sstevel@tonic-gate static int
6052*0Sstevel@tonic-gate cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6053*0Sstevel@tonic-gate {
6054*0Sstevel@tonic-gate 	struct async_flt *aflt;
6055*0Sstevel@tonic-gate 	uint64_t	afsr;
6056*0Sstevel@tonic-gate 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6057*0Sstevel@tonic-gate 
6058*0Sstevel@tonic-gate 	aflt = (struct async_flt *)ch_flt;
6059*0Sstevel@tonic-gate 	afsr = aflt->flt_stat;
6060*0Sstevel@tonic-gate 
6061*0Sstevel@tonic-gate 	/*
6062*0Sstevel@tonic-gate 	 * If we got multiple errors, no point in trying
6063*0Sstevel@tonic-gate 	 * the individual cases, just flush the whole cache
6064*0Sstevel@tonic-gate 	 */
6065*0Sstevel@tonic-gate 	if (afsr & C_AFSR_ME) {
6066*0Sstevel@tonic-gate 		return (ECACHE_FLUSH_ALL);
6067*0Sstevel@tonic-gate 	}
6068*0Sstevel@tonic-gate 
6069*0Sstevel@tonic-gate 	/*
6070*0Sstevel@tonic-gate 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6071*0Sstevel@tonic-gate 	 * was disabled, we need to flush entire E$. We can't just
6072*0Sstevel@tonic-gate 	 * flush the cache line affected as the ME bit
6073*0Sstevel@tonic-gate 	 * is not set when multiple correctable errors of the same
6074*0Sstevel@tonic-gate 	 * type occur, so we might have multiple CPC or EDC errors,
6075*0Sstevel@tonic-gate 	 * with only the first recorded.
6076*0Sstevel@tonic-gate 	 */
6077*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
6078*0Sstevel@tonic-gate 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6079*0Sstevel@tonic-gate #else	/* JALAPENO || SERRANO */
6080*0Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6081*0Sstevel@tonic-gate 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6082*0Sstevel@tonic-gate #endif	/* JALAPENO || SERRANO */
6083*0Sstevel@tonic-gate 		return (ECACHE_FLUSH_ALL);
6084*0Sstevel@tonic-gate 	}
6085*0Sstevel@tonic-gate 
6086*0Sstevel@tonic-gate #if defined(JALAPENO) || defined(SERRANO)
6087*0Sstevel@tonic-gate 	/*
6088*0Sstevel@tonic-gate 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6089*0Sstevel@tonic-gate 	 * flush the entire Ecache.
6090*0Sstevel@tonic-gate 	 */
6091*0Sstevel@tonic-gate 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6092*0Sstevel@tonic-gate 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6093*0Sstevel@tonic-gate 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6094*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
6095*0Sstevel@tonic-gate 		} else {
6096*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
6097*0Sstevel@tonic-gate 		}
6098*0Sstevel@tonic-gate 	}
6099*0Sstevel@tonic-gate #else /* JALAPENO || SERRANO */
6100*0Sstevel@tonic-gate 	/*
6101*0Sstevel@tonic-gate 	 * If UE only is set, flush the Ecache line, otherwise
6102*0Sstevel@tonic-gate 	 * flush the entire Ecache.
6103*0Sstevel@tonic-gate 	 */
6104*0Sstevel@tonic-gate 	if (afsr_errs & C_AFSR_UE) {
6105*0Sstevel@tonic-gate 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6106*0Sstevel@tonic-gate 		    C_AFSR_UE) {
6107*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
6108*0Sstevel@tonic-gate 		} else {
6109*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
6110*0Sstevel@tonic-gate 		}
6111*0Sstevel@tonic-gate 	}
6112*0Sstevel@tonic-gate #endif /* JALAPENO || SERRANO */
6113*0Sstevel@tonic-gate 
6114*0Sstevel@tonic-gate 	/*
6115*0Sstevel@tonic-gate 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6116*0Sstevel@tonic-gate 	 * flush the entire Ecache.
6117*0Sstevel@tonic-gate 	 */
6118*0Sstevel@tonic-gate 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6119*0Sstevel@tonic-gate 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6120*0Sstevel@tonic-gate 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6121*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
6122*0Sstevel@tonic-gate 		} else {
6123*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
6124*0Sstevel@tonic-gate 		}
6125*0Sstevel@tonic-gate 	}
6126*0Sstevel@tonic-gate 
6127*0Sstevel@tonic-gate 	/*
6128*0Sstevel@tonic-gate 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6129*0Sstevel@tonic-gate 	 * flush the entire Ecache.
6130*0Sstevel@tonic-gate 	 */
6131*0Sstevel@tonic-gate 	if (afsr_errs & C_AFSR_BERR) {
6132*0Sstevel@tonic-gate 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6133*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_LINE);
6134*0Sstevel@tonic-gate 		} else {
6135*0Sstevel@tonic-gate 			return (ECACHE_FLUSH_ALL);
6136*0Sstevel@tonic-gate 		}
6137*0Sstevel@tonic-gate 	}
6138*0Sstevel@tonic-gate 
6139*0Sstevel@tonic-gate 	return (0);
6140*0Sstevel@tonic-gate }
6141*0Sstevel@tonic-gate 
6142*0Sstevel@tonic-gate void
6143*0Sstevel@tonic-gate cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6144*0Sstevel@tonic-gate {
6145*0Sstevel@tonic-gate 	int	ecache_flush_flag =
6146*0Sstevel@tonic-gate 	    cpu_error_ecache_flush_required(ch_flt);
6147*0Sstevel@tonic-gate 
6148*0Sstevel@tonic-gate 	/*
6149*0Sstevel@tonic-gate 	 * Flush Ecache line or entire Ecache based on above checks.
6150*0Sstevel@tonic-gate 	 */
6151*0Sstevel@tonic-gate 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6152*0Sstevel@tonic-gate 		cpu_flush_ecache();
6153*0Sstevel@tonic-gate 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6154*0Sstevel@tonic-gate 		cpu_flush_ecache_line(ch_flt);
6155*0Sstevel@tonic-gate 	}
6156*0Sstevel@tonic-gate 
6157*0Sstevel@tonic-gate }
6158*0Sstevel@tonic-gate 
6159*0Sstevel@tonic-gate /*
6160*0Sstevel@tonic-gate  * Extract the PA portion from the E$ tag.
6161*0Sstevel@tonic-gate  */
6162*0Sstevel@tonic-gate uint64_t
6163*0Sstevel@tonic-gate cpu_ectag_to_pa(int setsize, uint64_t tag)
6164*0Sstevel@tonic-gate {
6165*0Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6166*0Sstevel@tonic-gate 		return (JG_ECTAG_TO_PA(setsize, tag));
6167*0Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6168*0Sstevel@tonic-gate 		return (PN_L3TAG_TO_PA(tag));
6169*0Sstevel@tonic-gate 	else
6170*0Sstevel@tonic-gate 		return (CH_ECTAG_TO_PA(setsize, tag));
6171*0Sstevel@tonic-gate }
6172*0Sstevel@tonic-gate 
6173*0Sstevel@tonic-gate /*
6174*0Sstevel@tonic-gate  * Convert the E$ tag PA into an E$ subblock index.
6175*0Sstevel@tonic-gate  */
6176*0Sstevel@tonic-gate static int
6177*0Sstevel@tonic-gate cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6178*0Sstevel@tonic-gate {
6179*0Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6180*0Sstevel@tonic-gate 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6181*0Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6182*0Sstevel@tonic-gate 		/* Panther has only one subblock per line */
6183*0Sstevel@tonic-gate 		return (0);
6184*0Sstevel@tonic-gate 	else
6185*0Sstevel@tonic-gate 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6186*0Sstevel@tonic-gate }
6187*0Sstevel@tonic-gate 
6188*0Sstevel@tonic-gate /*
6189*0Sstevel@tonic-gate  * All subblocks in an E$ line must be invalid for
6190*0Sstevel@tonic-gate  * the line to be invalid.
6191*0Sstevel@tonic-gate  */
6192*0Sstevel@tonic-gate int
6193*0Sstevel@tonic-gate cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6194*0Sstevel@tonic-gate {
6195*0Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6196*0Sstevel@tonic-gate 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6197*0Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6198*0Sstevel@tonic-gate 		return (PN_L3_LINE_INVALID(tag));
6199*0Sstevel@tonic-gate 	else
6200*0Sstevel@tonic-gate 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6201*0Sstevel@tonic-gate }
6202*0Sstevel@tonic-gate 
6203*0Sstevel@tonic-gate /*
6204*0Sstevel@tonic-gate  * Extract state bits for a subblock given the tag.  Note that for Panther
6205*0Sstevel@tonic-gate  * this works on both l2 and l3 tags.
6206*0Sstevel@tonic-gate  */
6207*0Sstevel@tonic-gate static int
6208*0Sstevel@tonic-gate cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6209*0Sstevel@tonic-gate {
6210*0Sstevel@tonic-gate 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6211*0Sstevel@tonic-gate 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6212*0Sstevel@tonic-gate 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6213*0Sstevel@tonic-gate 		return (tag & CH_ECSTATE_MASK);
6214*0Sstevel@tonic-gate 	else
6215*0Sstevel@tonic-gate 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6216*0Sstevel@tonic-gate }
6217*0Sstevel@tonic-gate 
6218*0Sstevel@tonic-gate /*
6219*0Sstevel@tonic-gate  * Cpu specific initialization.
6220*0Sstevel@tonic-gate  */
6221*0Sstevel@tonic-gate void
6222*0Sstevel@tonic-gate cpu_mp_init(void)
6223*0Sstevel@tonic-gate {
6224*0Sstevel@tonic-gate #ifdef	CHEETAHPLUS_ERRATUM_25
6225*0Sstevel@tonic-gate 	if (cheetah_sendmondo_recover) {
6226*0Sstevel@tonic-gate 		cheetah_nudge_init();
6227*0Sstevel@tonic-gate 	}
6228*0Sstevel@tonic-gate #endif
6229*0Sstevel@tonic-gate }
6230*0Sstevel@tonic-gate 
6231*0Sstevel@tonic-gate void
6232*0Sstevel@tonic-gate cpu_ereport_post(struct async_flt *aflt)
6233*0Sstevel@tonic-gate {
6234*0Sstevel@tonic-gate 	char *cpu_type, buf[FM_MAX_CLASS];
6235*0Sstevel@tonic-gate 	nv_alloc_t *nva = NULL;
6236*0Sstevel@tonic-gate 	nvlist_t *ereport, *detector, *resource;
6237*0Sstevel@tonic-gate 	errorq_elem_t *eqep;
6238*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6239*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
6240*0Sstevel@tonic-gate 	int len = 0;
6241*0Sstevel@tonic-gate 	uint8_t  msg_type;
6242*0Sstevel@tonic-gate 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6243*0Sstevel@tonic-gate 
6244*0Sstevel@tonic-gate 	if (aflt->flt_panic || panicstr) {
6245*0Sstevel@tonic-gate 		eqep = errorq_reserve(ereport_errorq);
6246*0Sstevel@tonic-gate 		if (eqep == NULL)
6247*0Sstevel@tonic-gate 			return;
6248*0Sstevel@tonic-gate 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6249*0Sstevel@tonic-gate 		nva = errorq_elem_nva(ereport_errorq, eqep);
6250*0Sstevel@tonic-gate 	} else {
6251*0Sstevel@tonic-gate 		ereport = fm_nvlist_create(nva);
6252*0Sstevel@tonic-gate 	}
6253*0Sstevel@tonic-gate 
6254*0Sstevel@tonic-gate 	/*
6255*0Sstevel@tonic-gate 	 * Create the scheme "cpu" FMRI.
6256*0Sstevel@tonic-gate 	 */
6257*0Sstevel@tonic-gate 	detector = fm_nvlist_create(nva);
6258*0Sstevel@tonic-gate 	resource = fm_nvlist_create(nva);
6259*0Sstevel@tonic-gate 	switch (cpunodes[aflt->flt_inst].implementation) {
6260*0Sstevel@tonic-gate 	case CHEETAH_IMPL:
6261*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIII;
6262*0Sstevel@tonic-gate 		break;
6263*0Sstevel@tonic-gate 	case CHEETAH_PLUS_IMPL:
6264*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6265*0Sstevel@tonic-gate 		break;
6266*0Sstevel@tonic-gate 	case JALAPENO_IMPL:
6267*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIi;
6268*0Sstevel@tonic-gate 		break;
6269*0Sstevel@tonic-gate 	case SERRANO_IMPL:
6270*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6271*0Sstevel@tonic-gate 		break;
6272*0Sstevel@tonic-gate 	case JAGUAR_IMPL:
6273*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIV;
6274*0Sstevel@tonic-gate 		break;
6275*0Sstevel@tonic-gate 	case PANTHER_IMPL:
6276*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_USIVplus;
6277*0Sstevel@tonic-gate 		break;
6278*0Sstevel@tonic-gate 	default:
6279*0Sstevel@tonic-gate 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6280*0Sstevel@tonic-gate 		break;
6281*0Sstevel@tonic-gate 	}
6282*0Sstevel@tonic-gate 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6283*0Sstevel@tonic-gate 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
6284*0Sstevel@tonic-gate 	    cpunodes[aflt->flt_inst].device_id);
6285*0Sstevel@tonic-gate 
6286*0Sstevel@tonic-gate 	/*
6287*0Sstevel@tonic-gate 	 * Encode all the common data into the ereport.
6288*0Sstevel@tonic-gate 	 */
6289*0Sstevel@tonic-gate 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6290*0Sstevel@tonic-gate 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6291*0Sstevel@tonic-gate 
6292*0Sstevel@tonic-gate 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6293*0Sstevel@tonic-gate 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6294*0Sstevel@tonic-gate 	    detector, NULL);
6295*0Sstevel@tonic-gate 
6296*0Sstevel@tonic-gate 	/*
6297*0Sstevel@tonic-gate 	 * Encode the error specific data that was saved in
6298*0Sstevel@tonic-gate 	 * the async_flt structure into the ereport.
6299*0Sstevel@tonic-gate 	 */
6300*0Sstevel@tonic-gate 	cpu_payload_add_aflt(aflt, ereport, resource,
6301*0Sstevel@tonic-gate 	    &plat_ecc_ch_flt.ecaf_afar_status,
6302*0Sstevel@tonic-gate 	    &plat_ecc_ch_flt.ecaf_synd_status);
6303*0Sstevel@tonic-gate 
6304*0Sstevel@tonic-gate 	if (aflt->flt_panic || panicstr) {
6305*0Sstevel@tonic-gate 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6306*0Sstevel@tonic-gate 	} else {
6307*0Sstevel@tonic-gate 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6308*0Sstevel@tonic-gate 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6309*0Sstevel@tonic-gate 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6310*0Sstevel@tonic-gate 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6311*0Sstevel@tonic-gate 	}
6312*0Sstevel@tonic-gate 	/*
6313*0Sstevel@tonic-gate 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6314*0Sstevel@tonic-gate 	 * to the SC olny if it can process it.
6315*0Sstevel@tonic-gate 	 */
6316*0Sstevel@tonic-gate 
6317*0Sstevel@tonic-gate 	if (&plat_ecc_capability_sc_get &&
6318*0Sstevel@tonic-gate 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6319*0Sstevel@tonic-gate 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6320*0Sstevel@tonic-gate 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6321*0Sstevel@tonic-gate 			/*
6322*0Sstevel@tonic-gate 			 * If afar status is not invalid do a unum lookup.
6323*0Sstevel@tonic-gate 			 */
6324*0Sstevel@tonic-gate 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6325*0Sstevel@tonic-gate 			    AFLT_STAT_INVALID) {
6326*0Sstevel@tonic-gate 				(void) cpu_get_mem_unum_aflt(
6327*0Sstevel@tonic-gate 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6328*0Sstevel@tonic-gate 				    unum, UNUM_NAMLEN, &len);
6329*0Sstevel@tonic-gate 			} else {
6330*0Sstevel@tonic-gate 				unum[0] = '\0';
6331*0Sstevel@tonic-gate 			}
6332*0Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6333*0Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6334*0Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6335*0Sstevel@tonic-gate 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6336*0Sstevel@tonic-gate 			    ch_flt->flt_sdw_afsr_ext;
6337*0Sstevel@tonic-gate 
6338*0Sstevel@tonic-gate 			if (&plat_log_fruid_error2)
6339*0Sstevel@tonic-gate 				plat_log_fruid_error2(msg_type, unum, aflt,
6340*0Sstevel@tonic-gate 				    &plat_ecc_ch_flt);
6341*0Sstevel@tonic-gate 		}
6342*0Sstevel@tonic-gate 	}
6343*0Sstevel@tonic-gate }
6344*0Sstevel@tonic-gate 
6345*0Sstevel@tonic-gate void
6346*0Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6347*0Sstevel@tonic-gate {
6348*0Sstevel@tonic-gate 	int status;
6349*0Sstevel@tonic-gate 	ddi_fm_error_t de;
6350*0Sstevel@tonic-gate 
6351*0Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
6352*0Sstevel@tonic-gate 
6353*0Sstevel@tonic-gate 	de.fme_version = DDI_FME_VERSION;
6354*0Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6355*0Sstevel@tonic-gate 	    FM_ENA_FMT1);
6356*0Sstevel@tonic-gate 	de.fme_flag = expected;
6357*0Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
6358*0Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6359*0Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6360*0Sstevel@tonic-gate 		aflt->flt_panic = 1;
6361*0Sstevel@tonic-gate }
6362*0Sstevel@tonic-gate 
6363*0Sstevel@tonic-gate void
6364*0Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6365*0Sstevel@tonic-gate     errorq_t *eqp, uint_t flag)
6366*0Sstevel@tonic-gate {
6367*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)payload;
6368*0Sstevel@tonic-gate 
6369*0Sstevel@tonic-gate 	aflt->flt_erpt_class = error_class;
6370*0Sstevel@tonic-gate 	errorq_dispatch(eqp, payload, payload_sz, flag);
6371*0Sstevel@tonic-gate }
6372*0Sstevel@tonic-gate 
6373*0Sstevel@tonic-gate /*
6374*0Sstevel@tonic-gate  * This routine may be called by the IO module, but does not do
6375*0Sstevel@tonic-gate  * anything in this cpu module. The SERD algorithm is handled by
6376*0Sstevel@tonic-gate  * cpumem-diagnosis engine instead.
6377*0Sstevel@tonic-gate  */
6378*0Sstevel@tonic-gate /*ARGSUSED*/
6379*0Sstevel@tonic-gate void
6380*0Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6381*0Sstevel@tonic-gate {}
6382*0Sstevel@tonic-gate 
6383*0Sstevel@tonic-gate void
6384*0Sstevel@tonic-gate adjust_hw_copy_limits(int ecache_size)
6385*0Sstevel@tonic-gate {
6386*0Sstevel@tonic-gate 	/*
6387*0Sstevel@tonic-gate 	 * Set hw copy limits.
6388*0Sstevel@tonic-gate 	 *
6389*0Sstevel@tonic-gate 	 * /etc/system will be parsed later and can override one or more
6390*0Sstevel@tonic-gate 	 * of these settings.
6391*0Sstevel@tonic-gate 	 *
6392*0Sstevel@tonic-gate 	 * At this time, ecache size seems only mildly relevant.
6393*0Sstevel@tonic-gate 	 * We seem to run into issues with the d-cache and stalls
6394*0Sstevel@tonic-gate 	 * we see on misses.
6395*0Sstevel@tonic-gate 	 *
6396*0Sstevel@tonic-gate 	 * Cycle measurement indicates that 2 byte aligned copies fare
6397*0Sstevel@tonic-gate 	 * little better than doing things with VIS at around 512 bytes.
6398*0Sstevel@tonic-gate 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6399*0Sstevel@tonic-gate 	 * aligned is faster whenever the source and destination data
6400*0Sstevel@tonic-gate 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6401*0Sstevel@tonic-gate 	 * limit seems to be driven by the 2K write cache.
6402*0Sstevel@tonic-gate 	 * When more than 2K of copies are done in non-VIS mode, stores
6403*0Sstevel@tonic-gate 	 * backup in the write cache.  In VIS mode, the write cache is
6404*0Sstevel@tonic-gate 	 * bypassed, allowing faster cache-line writes aligned on cache
6405*0Sstevel@tonic-gate 	 * boundaries.
6406*0Sstevel@tonic-gate 	 *
6407*0Sstevel@tonic-gate 	 * In addition, in non-VIS mode, there is no prefetching, so
6408*0Sstevel@tonic-gate 	 * for larger copies, the advantage of prefetching to avoid even
6409*0Sstevel@tonic-gate 	 * occasional cache misses is enough to justify using the VIS code.
6410*0Sstevel@tonic-gate 	 *
6411*0Sstevel@tonic-gate 	 * During testing, it was discovered that netbench ran 3% slower
6412*0Sstevel@tonic-gate 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6413*0Sstevel@tonic-gate 	 * applications, data is only used once (copied to the output
6414*0Sstevel@tonic-gate 	 * buffer, then copied by the network device off the system).  Using
6415*0Sstevel@tonic-gate 	 * the VIS copy saves more L2 cache state.  Network copies are
6416*0Sstevel@tonic-gate 	 * around 1.3K to 1.5K in size for historical reasons.
6417*0Sstevel@tonic-gate 	 *
6418*0Sstevel@tonic-gate 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6419*0Sstevel@tonic-gate 	 * aligned copy even for large caches and 8 MB ecache.  The
6420*0Sstevel@tonic-gate 	 * infrastructure to allow different limits for different sized
6421*0Sstevel@tonic-gate 	 * caches is kept to allow further tuning in later releases.
6422*0Sstevel@tonic-gate 	 */
6423*0Sstevel@tonic-gate 
6424*0Sstevel@tonic-gate 	if (min_ecache_size == 0 && use_hw_bcopy) {
6425*0Sstevel@tonic-gate 		/*
6426*0Sstevel@tonic-gate 		 * First time through - should be before /etc/system
6427*0Sstevel@tonic-gate 		 * is read.
6428*0Sstevel@tonic-gate 		 * Could skip the checks for zero but this lets us
6429*0Sstevel@tonic-gate 		 * preserve any debugger rewrites.
6430*0Sstevel@tonic-gate 		 */
6431*0Sstevel@tonic-gate 		if (hw_copy_limit_1 == 0) {
6432*0Sstevel@tonic-gate 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6433*0Sstevel@tonic-gate 			priv_hcl_1 = hw_copy_limit_1;
6434*0Sstevel@tonic-gate 		}
6435*0Sstevel@tonic-gate 		if (hw_copy_limit_2 == 0) {
6436*0Sstevel@tonic-gate 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6437*0Sstevel@tonic-gate 			priv_hcl_2 = hw_copy_limit_2;
6438*0Sstevel@tonic-gate 		}
6439*0Sstevel@tonic-gate 		if (hw_copy_limit_4 == 0) {
6440*0Sstevel@tonic-gate 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6441*0Sstevel@tonic-gate 			priv_hcl_4 = hw_copy_limit_4;
6442*0Sstevel@tonic-gate 		}
6443*0Sstevel@tonic-gate 		if (hw_copy_limit_8 == 0) {
6444*0Sstevel@tonic-gate 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6445*0Sstevel@tonic-gate 			priv_hcl_8 = hw_copy_limit_8;
6446*0Sstevel@tonic-gate 		}
6447*0Sstevel@tonic-gate 		min_ecache_size = ecache_size;
6448*0Sstevel@tonic-gate 	} else {
6449*0Sstevel@tonic-gate 		/*
6450*0Sstevel@tonic-gate 		 * MP initialization. Called *after* /etc/system has
6451*0Sstevel@tonic-gate 		 * been parsed. One CPU has already been initialized.
6452*0Sstevel@tonic-gate 		 * Need to cater for /etc/system having scragged one
6453*0Sstevel@tonic-gate 		 * of our values.
6454*0Sstevel@tonic-gate 		 */
6455*0Sstevel@tonic-gate 		if (ecache_size == min_ecache_size) {
6456*0Sstevel@tonic-gate 			/*
6457*0Sstevel@tonic-gate 			 * Same size ecache. We do nothing unless we
6458*0Sstevel@tonic-gate 			 * have a pessimistic ecache setting. In that
6459*0Sstevel@tonic-gate 			 * case we become more optimistic (if the cache is
6460*0Sstevel@tonic-gate 			 * large enough).
6461*0Sstevel@tonic-gate 			 */
6462*0Sstevel@tonic-gate 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6463*0Sstevel@tonic-gate 				/*
6464*0Sstevel@tonic-gate 				 * Need to adjust hw_copy_limit* from our
6465*0Sstevel@tonic-gate 				 * pessimistic uniprocessor value to a more
6466*0Sstevel@tonic-gate 				 * optimistic UP value *iff* it hasn't been
6467*0Sstevel@tonic-gate 				 * reset.
6468*0Sstevel@tonic-gate 				 */
6469*0Sstevel@tonic-gate 				if ((ecache_size > 1048576) &&
6470*0Sstevel@tonic-gate 				    (priv_hcl_8 == hw_copy_limit_8)) {
6471*0Sstevel@tonic-gate 					if (ecache_size <= 2097152)
6472*0Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
6473*0Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
6474*0Sstevel@tonic-gate 					else if (ecache_size <= 4194304)
6475*0Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
6476*0Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
6477*0Sstevel@tonic-gate 					else
6478*0Sstevel@tonic-gate 						hw_copy_limit_8 = 4 *
6479*0Sstevel@tonic-gate 						    VIS_COPY_THRESHOLD;
6480*0Sstevel@tonic-gate 					priv_hcl_8 = hw_copy_limit_8;
6481*0Sstevel@tonic-gate 				}
6482*0Sstevel@tonic-gate 			}
6483*0Sstevel@tonic-gate 		} else if (ecache_size < min_ecache_size) {
6484*0Sstevel@tonic-gate 			/*
6485*0Sstevel@tonic-gate 			 * A different ecache size. Can this even happen?
6486*0Sstevel@tonic-gate 			 */
6487*0Sstevel@tonic-gate 			if (priv_hcl_8 == hw_copy_limit_8) {
6488*0Sstevel@tonic-gate 				/*
6489*0Sstevel@tonic-gate 				 * The previous value that we set
6490*0Sstevel@tonic-gate 				 * is unchanged (i.e., it hasn't been
6491*0Sstevel@tonic-gate 				 * scragged by /etc/system). Rewrite it.
6492*0Sstevel@tonic-gate 				 */
6493*0Sstevel@tonic-gate 				if (ecache_size <= 1048576)
6494*0Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
6495*0Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
6496*0Sstevel@tonic-gate 				else if (ecache_size <= 2097152)
6497*0Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
6498*0Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
6499*0Sstevel@tonic-gate 				else if (ecache_size <= 4194304)
6500*0Sstevel@tonic-gate 					hw_copy_limit_8 = 8 *
6501*0Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
6502*0Sstevel@tonic-gate 				else
6503*0Sstevel@tonic-gate 					hw_copy_limit_8 = 10 *
6504*0Sstevel@tonic-gate 					    VIS_COPY_THRESHOLD;
6505*0Sstevel@tonic-gate 				priv_hcl_8 = hw_copy_limit_8;
6506*0Sstevel@tonic-gate 				min_ecache_size = ecache_size;
6507*0Sstevel@tonic-gate 			}
6508*0Sstevel@tonic-gate 		}
6509*0Sstevel@tonic-gate 	}
6510*0Sstevel@tonic-gate }
6511*0Sstevel@tonic-gate 
6512*0Sstevel@tonic-gate /*
6513*0Sstevel@tonic-gate  * Called from illegal instruction trap handler to see if we can attribute
6514*0Sstevel@tonic-gate  * the trap to a fpras check.
6515*0Sstevel@tonic-gate  */
6516*0Sstevel@tonic-gate int
6517*0Sstevel@tonic-gate fpras_chktrap(struct regs *rp)
6518*0Sstevel@tonic-gate {
6519*0Sstevel@tonic-gate 	int op;
6520*0Sstevel@tonic-gate 	struct fpras_chkfngrp *cgp;
6521*0Sstevel@tonic-gate 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6522*0Sstevel@tonic-gate 
6523*0Sstevel@tonic-gate 	if (fpras_chkfngrps == NULL)
6524*0Sstevel@tonic-gate 		return (0);
6525*0Sstevel@tonic-gate 
6526*0Sstevel@tonic-gate 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6527*0Sstevel@tonic-gate 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6528*0Sstevel@tonic-gate 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6529*0Sstevel@tonic-gate 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6530*0Sstevel@tonic-gate 			break;
6531*0Sstevel@tonic-gate 	}
6532*0Sstevel@tonic-gate 	if (op == FPRAS_NCOPYOPS)
6533*0Sstevel@tonic-gate 		return (0);
6534*0Sstevel@tonic-gate 
6535*0Sstevel@tonic-gate 	/*
6536*0Sstevel@tonic-gate 	 * This is an fpRAS failure caught through an illegal
6537*0Sstevel@tonic-gate 	 * instruction - trampoline.
6538*0Sstevel@tonic-gate 	 */
6539*0Sstevel@tonic-gate 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6540*0Sstevel@tonic-gate 	rp->r_npc = rp->r_pc + 4;
6541*0Sstevel@tonic-gate 	return (1);
6542*0Sstevel@tonic-gate }
6543*0Sstevel@tonic-gate 
6544*0Sstevel@tonic-gate /*
6545*0Sstevel@tonic-gate  * fpras_failure is called when a fpras check detects a bad calculation
6546*0Sstevel@tonic-gate  * result or an illegal instruction trap is attributed to an fpras
6547*0Sstevel@tonic-gate  * check.  In all cases we are still bound to CPU.
6548*0Sstevel@tonic-gate  */
6549*0Sstevel@tonic-gate int
6550*0Sstevel@tonic-gate fpras_failure(int op, int how)
6551*0Sstevel@tonic-gate {
6552*0Sstevel@tonic-gate 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6553*0Sstevel@tonic-gate 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6554*0Sstevel@tonic-gate 	ch_async_flt_t ch_flt;
6555*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6556*0Sstevel@tonic-gate 	struct fpras_chkfn *sfp, *cfp;
6557*0Sstevel@tonic-gate 	uint32_t *sip, *cip;
6558*0Sstevel@tonic-gate 	int i;
6559*0Sstevel@tonic-gate 
6560*0Sstevel@tonic-gate 	/*
6561*0Sstevel@tonic-gate 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6562*0Sstevel@tonic-gate 	 * the time in which we dispatch an ereport and (if applicable) panic.
6563*0Sstevel@tonic-gate 	 */
6564*0Sstevel@tonic-gate 	use_hw_bcopy_orig = use_hw_bcopy;
6565*0Sstevel@tonic-gate 	use_hw_bzero_orig = use_hw_bzero;
6566*0Sstevel@tonic-gate 	hcl1_orig = hw_copy_limit_1;
6567*0Sstevel@tonic-gate 	hcl2_orig = hw_copy_limit_2;
6568*0Sstevel@tonic-gate 	hcl4_orig = hw_copy_limit_4;
6569*0Sstevel@tonic-gate 	hcl8_orig = hw_copy_limit_8;
6570*0Sstevel@tonic-gate 	use_hw_bcopy = use_hw_bzero = 0;
6571*0Sstevel@tonic-gate 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6572*0Sstevel@tonic-gate 	    hw_copy_limit_8 = 0;
6573*0Sstevel@tonic-gate 
6574*0Sstevel@tonic-gate 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6575*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
6576*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
6577*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
6578*0Sstevel@tonic-gate 	aflt->flt_status = (how << 8) | op;
6579*0Sstevel@tonic-gate 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6580*0Sstevel@tonic-gate 	ch_flt.flt_type = CPU_FPUERR;
6581*0Sstevel@tonic-gate 
6582*0Sstevel@tonic-gate 	/*
6583*0Sstevel@tonic-gate 	 * We must panic if the copy operation had no lofault protection -
6584*0Sstevel@tonic-gate 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6585*0Sstevel@tonic-gate 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6586*0Sstevel@tonic-gate 	 */
6587*0Sstevel@tonic-gate 	aflt->flt_panic = (curthread->t_lofault == NULL);
6588*0Sstevel@tonic-gate 
6589*0Sstevel@tonic-gate 	/*
6590*0Sstevel@tonic-gate 	 * XOR the source instruction block with the copied instruction
6591*0Sstevel@tonic-gate 	 * block - this will show us which bit(s) are corrupted.
6592*0Sstevel@tonic-gate 	 */
6593*0Sstevel@tonic-gate 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6594*0Sstevel@tonic-gate 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6595*0Sstevel@tonic-gate 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6596*0Sstevel@tonic-gate 		sip = &sfp->fpras_blk0[0];
6597*0Sstevel@tonic-gate 		cip = &cfp->fpras_blk0[0];
6598*0Sstevel@tonic-gate 	} else {
6599*0Sstevel@tonic-gate 		sip = &sfp->fpras_blk1[0];
6600*0Sstevel@tonic-gate 		cip = &cfp->fpras_blk1[0];
6601*0Sstevel@tonic-gate 	}
6602*0Sstevel@tonic-gate 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6603*0Sstevel@tonic-gate 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6604*0Sstevel@tonic-gate 
6605*0Sstevel@tonic-gate 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6606*0Sstevel@tonic-gate 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6607*0Sstevel@tonic-gate 
6608*0Sstevel@tonic-gate 	if (aflt->flt_panic)
6609*0Sstevel@tonic-gate 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6610*0Sstevel@tonic-gate 
6611*0Sstevel@tonic-gate 	/*
6612*0Sstevel@tonic-gate 	 * We get here for copyin/copyout and kcopy or bcopy where the
6613*0Sstevel@tonic-gate 	 * caller has used on_fault.  We will flag the error so that
6614*0Sstevel@tonic-gate 	 * the process may be killed  The trap_async_hwerr mechanism will
6615*0Sstevel@tonic-gate 	 * take appropriate further action (such as a reboot, contract
6616*0Sstevel@tonic-gate 	 * notification etc).  Since we may be continuing we will
6617*0Sstevel@tonic-gate 	 * restore the global hardware copy acceleration switches.
6618*0Sstevel@tonic-gate 	 *
6619*0Sstevel@tonic-gate 	 * When we return from this function to the copy function we want to
6620*0Sstevel@tonic-gate 	 * avoid potentially bad data being used, ie we want the affected
6621*0Sstevel@tonic-gate 	 * copy function to return an error.  The caller should therefore
6622*0Sstevel@tonic-gate 	 * invoke its lofault handler (which always exists for these functions)
6623*0Sstevel@tonic-gate 	 * which will return the appropriate error.
6624*0Sstevel@tonic-gate 	 */
6625*0Sstevel@tonic-gate 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6626*0Sstevel@tonic-gate 	aston(curthread);
6627*0Sstevel@tonic-gate 
6628*0Sstevel@tonic-gate 	use_hw_bcopy = use_hw_bcopy_orig;
6629*0Sstevel@tonic-gate 	use_hw_bzero = use_hw_bzero_orig;
6630*0Sstevel@tonic-gate 	hw_copy_limit_1 = hcl1_orig;
6631*0Sstevel@tonic-gate 	hw_copy_limit_2 = hcl2_orig;
6632*0Sstevel@tonic-gate 	hw_copy_limit_4 = hcl4_orig;
6633*0Sstevel@tonic-gate 	hw_copy_limit_8 = hcl8_orig;
6634*0Sstevel@tonic-gate 
6635*0Sstevel@tonic-gate 	return (1);
6636*0Sstevel@tonic-gate }
6637*0Sstevel@tonic-gate 
6638*0Sstevel@tonic-gate #define	VIS_BLOCKSIZE		64
6639*0Sstevel@tonic-gate 
6640*0Sstevel@tonic-gate int
6641*0Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6642*0Sstevel@tonic-gate {
6643*0Sstevel@tonic-gate 	int ret, watched;
6644*0Sstevel@tonic-gate 
6645*0Sstevel@tonic-gate 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6646*0Sstevel@tonic-gate 	ret = dtrace_blksuword32(addr, data, 0);
6647*0Sstevel@tonic-gate 	if (watched)
6648*0Sstevel@tonic-gate 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6649*0Sstevel@tonic-gate 
6650*0Sstevel@tonic-gate 	return (ret);
6651*0Sstevel@tonic-gate }
6652*0Sstevel@tonic-gate 
6653*0Sstevel@tonic-gate /*
6654*0Sstevel@tonic-gate  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6655*0Sstevel@tonic-gate  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6656*0Sstevel@tonic-gate  * CEEN from the EER to disable traps for further disrupting error types
6657*0Sstevel@tonic-gate  * on that cpu.  We could cross-call instead, but that has a larger
6658*0Sstevel@tonic-gate  * instruction and data footprint than cross-trapping, and the cpu is known
6659*0Sstevel@tonic-gate  * to be faulted.
6660*0Sstevel@tonic-gate  */
6661*0Sstevel@tonic-gate 
6662*0Sstevel@tonic-gate void
6663*0Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp)
6664*0Sstevel@tonic-gate {
6665*0Sstevel@tonic-gate 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6666*0Sstevel@tonic-gate }
6667*0Sstevel@tonic-gate 
6668*0Sstevel@tonic-gate /*
6669*0Sstevel@tonic-gate  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6670*0Sstevel@tonic-gate  * offline, spare, or online (by the cpu requesting this state change).
6671*0Sstevel@tonic-gate  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6672*0Sstevel@tonic-gate  * disrupting error bits that have accumulated without trapping, then
6673*0Sstevel@tonic-gate  * we cross-trap to re-enable CEEN controlled traps.
6674*0Sstevel@tonic-gate  */
6675*0Sstevel@tonic-gate void
6676*0Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp)
6677*0Sstevel@tonic-gate {
6678*0Sstevel@tonic-gate 	ch_cpu_errors_t cpu_error_regs;
6679*0Sstevel@tonic-gate 
6680*0Sstevel@tonic-gate 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6681*0Sstevel@tonic-gate 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6682*0Sstevel@tonic-gate 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6683*0Sstevel@tonic-gate 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6684*0Sstevel@tonic-gate 	    (uint64_t)&cpu_error_regs, 0);
6685*0Sstevel@tonic-gate 
6686*0Sstevel@tonic-gate 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6687*0Sstevel@tonic-gate }
6688*0Sstevel@tonic-gate 
6689*0Sstevel@tonic-gate /*
6690*0Sstevel@tonic-gate  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6691*0Sstevel@tonic-gate  * the errors in the original AFSR, 0 otherwise.
6692*0Sstevel@tonic-gate  *
6693*0Sstevel@tonic-gate  * For all procs if the initial error was a BERR or TO, then it is possible
6694*0Sstevel@tonic-gate  * that we may have caused a secondary BERR or TO in the process of logging the
6695*0Sstevel@tonic-gate  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6696*0Sstevel@tonic-gate  * if the request was protected then a panic is still not necessary, if not
6697*0Sstevel@tonic-gate  * protected then aft_panic is already set - so either way there's no need
6698*0Sstevel@tonic-gate  * to set aft_panic for the secondary error.
6699*0Sstevel@tonic-gate  *
6700*0Sstevel@tonic-gate  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6701*0Sstevel@tonic-gate  * a store merge, then the error handling code will call cpu_deferred_error().
6702*0Sstevel@tonic-gate  * When clear_errors() is called, it will determine that secondary errors have
6703*0Sstevel@tonic-gate  * occurred - in particular, the store merge also caused a EDU and WDU that
6704*0Sstevel@tonic-gate  * weren't discovered until this point.
6705*0Sstevel@tonic-gate  *
6706*0Sstevel@tonic-gate  * We do three checks to verify that we are in this case.  If we pass all three
6707*0Sstevel@tonic-gate  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6708*0Sstevel@tonic-gate  * errors occur, we return 0.
6709*0Sstevel@tonic-gate  *
6710*0Sstevel@tonic-gate  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6711*0Sstevel@tonic-gate  * handled in cpu_disrupting_errors().  Since this function is not even called
6712*0Sstevel@tonic-gate  * in the case we are interested in, we just return 0 for these processors.
6713*0Sstevel@tonic-gate  */
6714*0Sstevel@tonic-gate /*ARGSUSED*/
6715*0Sstevel@tonic-gate static int
6716*0Sstevel@tonic-gate cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6717*0Sstevel@tonic-gate     uint64_t t_afar)
6718*0Sstevel@tonic-gate {
6719*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
6720*0Sstevel@tonic-gate #else	/* CHEETAH_PLUS */
6721*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6722*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
6723*0Sstevel@tonic-gate 
6724*0Sstevel@tonic-gate 	/*
6725*0Sstevel@tonic-gate 	 * Was the original error a BERR or TO and only a BERR or TO
6726*0Sstevel@tonic-gate 	 * (multiple errors are also OK)
6727*0Sstevel@tonic-gate 	 */
6728*0Sstevel@tonic-gate 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6729*0Sstevel@tonic-gate 		/*
6730*0Sstevel@tonic-gate 		 * Is the new error a BERR or TO and only a BERR or TO
6731*0Sstevel@tonic-gate 		 * (multiple errors are also OK)
6732*0Sstevel@tonic-gate 		 */
6733*0Sstevel@tonic-gate 		if ((ch_flt->afsr_errs &
6734*0Sstevel@tonic-gate 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6735*0Sstevel@tonic-gate 			return (1);
6736*0Sstevel@tonic-gate 	}
6737*0Sstevel@tonic-gate 
6738*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
6739*0Sstevel@tonic-gate 	return (0);
6740*0Sstevel@tonic-gate #else	/* CHEETAH_PLUS */
6741*0Sstevel@tonic-gate 	/*
6742*0Sstevel@tonic-gate 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6743*0Sstevel@tonic-gate 	 *
6744*0Sstevel@tonic-gate 	 * Check the original error was a UE, and only a UE.  Note that
6745*0Sstevel@tonic-gate 	 * the ME bit will cause us to fail this check.
6746*0Sstevel@tonic-gate 	 */
6747*0Sstevel@tonic-gate 	if (t_afsr_errs != C_AFSR_UE)
6748*0Sstevel@tonic-gate 		return (0);
6749*0Sstevel@tonic-gate 
6750*0Sstevel@tonic-gate 	/*
6751*0Sstevel@tonic-gate 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6752*0Sstevel@tonic-gate 	 */
6753*0Sstevel@tonic-gate 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6754*0Sstevel@tonic-gate 		return (0);
6755*0Sstevel@tonic-gate 
6756*0Sstevel@tonic-gate 	/*
6757*0Sstevel@tonic-gate 	 * Check the AFAR of the original error and secondary errors
6758*0Sstevel@tonic-gate 	 * match to the 64-byte boundary
6759*0Sstevel@tonic-gate 	 */
6760*0Sstevel@tonic-gate 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6761*0Sstevel@tonic-gate 		return (0);
6762*0Sstevel@tonic-gate 
6763*0Sstevel@tonic-gate 	/*
6764*0Sstevel@tonic-gate 	 * We've passed all the checks, so it's a secondary error!
6765*0Sstevel@tonic-gate 	 */
6766*0Sstevel@tonic-gate 	return (1);
6767*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
6768*0Sstevel@tonic-gate }
6769*0Sstevel@tonic-gate 
6770*0Sstevel@tonic-gate /*
6771*0Sstevel@tonic-gate  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6772*0Sstevel@tonic-gate  * is checked for any valid errors.  If found, the error type is
6773*0Sstevel@tonic-gate  * returned. If not found, the flt_type is checked for L1$ parity errors.
6774*0Sstevel@tonic-gate  */
6775*0Sstevel@tonic-gate /*ARGSUSED*/
6776*0Sstevel@tonic-gate static uint8_t
6777*0Sstevel@tonic-gate cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6778*0Sstevel@tonic-gate {
6779*0Sstevel@tonic-gate #if defined(JALAPENO)
6780*0Sstevel@tonic-gate 	/*
6781*0Sstevel@tonic-gate 	 * Currently, logging errors to the SC is not supported on Jalapeno
6782*0Sstevel@tonic-gate 	 */
6783*0Sstevel@tonic-gate 	return (PLAT_ECC_ERROR2_NONE);
6784*0Sstevel@tonic-gate #else
6785*0Sstevel@tonic-gate 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6786*0Sstevel@tonic-gate 
6787*0Sstevel@tonic-gate 	switch (ch_flt->flt_bit) {
6788*0Sstevel@tonic-gate 	case C_AFSR_CE:
6789*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_CE);
6790*0Sstevel@tonic-gate 	case C_AFSR_UCC:
6791*0Sstevel@tonic-gate 	case C_AFSR_EDC:
6792*0Sstevel@tonic-gate 	case C_AFSR_WDC:
6793*0Sstevel@tonic-gate 	case C_AFSR_CPC:
6794*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_CE);
6795*0Sstevel@tonic-gate 	case C_AFSR_EMC:
6796*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_EMC);
6797*0Sstevel@tonic-gate 	case C_AFSR_IVC:
6798*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IVC);
6799*0Sstevel@tonic-gate 	case C_AFSR_UE:
6800*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_UE);
6801*0Sstevel@tonic-gate 	case C_AFSR_UCU:
6802*0Sstevel@tonic-gate 	case C_AFSR_EDU:
6803*0Sstevel@tonic-gate 	case C_AFSR_WDU:
6804*0Sstevel@tonic-gate 	case C_AFSR_CPU:
6805*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_UE);
6806*0Sstevel@tonic-gate 	case C_AFSR_IVU:
6807*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IVU);
6808*0Sstevel@tonic-gate 	case C_AFSR_TO:
6809*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_TO);
6810*0Sstevel@tonic-gate 	case C_AFSR_BERR:
6811*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_BERR);
6812*0Sstevel@tonic-gate #if defined(CHEETAH_PLUS)
6813*0Sstevel@tonic-gate 	case C_AFSR_L3_EDC:
6814*0Sstevel@tonic-gate 	case C_AFSR_L3_UCC:
6815*0Sstevel@tonic-gate 	case C_AFSR_L3_CPC:
6816*0Sstevel@tonic-gate 	case C_AFSR_L3_WDC:
6817*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_CE);
6818*0Sstevel@tonic-gate 	case C_AFSR_IMC:
6819*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_IMC);
6820*0Sstevel@tonic-gate 	case C_AFSR_TSCE:
6821*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_TSCE);
6822*0Sstevel@tonic-gate 	case C_AFSR_THCE:
6823*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L2_THCE);
6824*0Sstevel@tonic-gate 	case C_AFSR_L3_MECC:
6825*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_MECC);
6826*0Sstevel@tonic-gate 	case C_AFSR_L3_THCE:
6827*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_THCE);
6828*0Sstevel@tonic-gate 	case C_AFSR_L3_CPU:
6829*0Sstevel@tonic-gate 	case C_AFSR_L3_EDU:
6830*0Sstevel@tonic-gate 	case C_AFSR_L3_UCU:
6831*0Sstevel@tonic-gate 	case C_AFSR_L3_WDU:
6832*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_L3_UE);
6833*0Sstevel@tonic-gate 	case C_AFSR_DUE:
6834*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DUE);
6835*0Sstevel@tonic-gate 	case C_AFSR_DTO:
6836*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DTO);
6837*0Sstevel@tonic-gate 	case C_AFSR_DBERR:
6838*0Sstevel@tonic-gate 		return (PLAT_ECC_ERROR2_DBERR);
6839*0Sstevel@tonic-gate #endif	/* CHEETAH_PLUS */
6840*0Sstevel@tonic-gate 	default:
6841*0Sstevel@tonic-gate 		switch (ch_flt->flt_type) {
6842*0Sstevel@tonic-gate #if defined(CPU_IMP_L1_CACHE_PARITY)
6843*0Sstevel@tonic-gate 		case CPU_IC_PARITY:
6844*0Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_IPE);
6845*0Sstevel@tonic-gate 		case CPU_DC_PARITY:
6846*0Sstevel@tonic-gate 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
6847*0Sstevel@tonic-gate 				if (ch_flt->parity_data.dpe.cpl_cache ==
6848*0Sstevel@tonic-gate 				    CPU_PC_PARITY) {
6849*0Sstevel@tonic-gate 					return (PLAT_ECC_ERROR2_PCACHE);
6850*0Sstevel@tonic-gate 				}
6851*0Sstevel@tonic-gate 			}
6852*0Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_DPE);
6853*0Sstevel@tonic-gate #endif /* CPU_IMP_L1_CACHE_PARITY */
6854*0Sstevel@tonic-gate 		case CPU_ITLB_PARITY:
6855*0Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_ITLB);
6856*0Sstevel@tonic-gate 		case CPU_DTLB_PARITY:
6857*0Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_DTLB);
6858*0Sstevel@tonic-gate 		default:
6859*0Sstevel@tonic-gate 			return (PLAT_ECC_ERROR2_NONE);
6860*0Sstevel@tonic-gate 		}
6861*0Sstevel@tonic-gate 	}
6862*0Sstevel@tonic-gate #endif	/* JALAPENO */
6863*0Sstevel@tonic-gate }
6864