1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <sys/systm.h>
31*0Sstevel@tonic-gate #include <sys/archsystm.h>
32*0Sstevel@tonic-gate #include <sys/machparam.h>
33*0Sstevel@tonic-gate #include <sys/machsystm.h>
34*0Sstevel@tonic-gate #include <sys/cpu.h>
35*0Sstevel@tonic-gate #include <sys/elf_SPARC.h>
36*0Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
37*0Sstevel@tonic-gate #include <vm/page.h>
38*0Sstevel@tonic-gate #include <sys/cpuvar.h>
39*0Sstevel@tonic-gate #include <sys/spitregs.h>
40*0Sstevel@tonic-gate #include <sys/async.h>
41*0Sstevel@tonic-gate #include <sys/cmn_err.h>
42*0Sstevel@tonic-gate #include <sys/debug.h>
43*0Sstevel@tonic-gate #include <sys/dditypes.h>
44*0Sstevel@tonic-gate #include <sys/sunddi.h>
45*0Sstevel@tonic-gate #include <sys/cpu_module.h>
46*0Sstevel@tonic-gate #include <sys/prom_debug.h>
47*0Sstevel@tonic-gate #include <sys/vmsystm.h>
48*0Sstevel@tonic-gate #include <sys/prom_plat.h>
49*0Sstevel@tonic-gate #include <sys/sysmacros.h>
50*0Sstevel@tonic-gate #include <sys/intreg.h>
51*0Sstevel@tonic-gate #include <sys/machtrap.h>
52*0Sstevel@tonic-gate #include <sys/ontrap.h>
53*0Sstevel@tonic-gate #include <sys/ivintr.h>
54*0Sstevel@tonic-gate #include <sys/atomic.h>
55*0Sstevel@tonic-gate #include <sys/panic.h>
56*0Sstevel@tonic-gate #include <sys/ndifm.h>
57*0Sstevel@tonic-gate #include <sys/fm/protocol.h>
58*0Sstevel@tonic-gate #include <sys/fm/util.h>
59*0Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-II.h>
60*0Sstevel@tonic-gate #include <sys/ddi.h>
61*0Sstevel@tonic-gate #include <sys/ecc_kstat.h>
62*0Sstevel@tonic-gate #include <sys/watchpoint.h>
63*0Sstevel@tonic-gate #include <sys/dtrace.h>
64*0Sstevel@tonic-gate #include <sys/errclassify.h>
65*0Sstevel@tonic-gate 
66*0Sstevel@tonic-gate uchar_t	*ctx_pgsz_array = NULL;
67*0Sstevel@tonic-gate 
68*0Sstevel@tonic-gate /*
69*0Sstevel@tonic-gate  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70*0Sstevel@tonic-gate  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71*0Sstevel@tonic-gate  */
72*0Sstevel@tonic-gate typedef struct sf_ec_data_elm {
73*0Sstevel@tonic-gate 	uint64_t ec_d8;
74*0Sstevel@tonic-gate 	uint64_t ec_afsr;
75*0Sstevel@tonic-gate } ec_data_t;
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate /*
78*0Sstevel@tonic-gate  * Define spitfire (Ultra I/II) specific asynchronous error structure
79*0Sstevel@tonic-gate  */
80*0Sstevel@tonic-gate typedef struct spitfire_async_flt {
81*0Sstevel@tonic-gate 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82*0Sstevel@tonic-gate 	ushort_t flt_type;		/* types of faults - cpu specific */
83*0Sstevel@tonic-gate 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84*0Sstevel@tonic-gate 	uint64_t flt_ec_tag;		/* E$ tag info */
85*0Sstevel@tonic-gate 	int flt_ec_lcnt;		/* number of bad E$ lines */
86*0Sstevel@tonic-gate 	ushort_t flt_sdbh;		/* UDBH reg */
87*0Sstevel@tonic-gate 	ushort_t flt_sdbl;		/* UDBL reg */
88*0Sstevel@tonic-gate } spitf_async_flt;
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate /*
91*0Sstevel@tonic-gate  * Prototypes for support routines in spitfire_asm.s:
92*0Sstevel@tonic-gate  */
93*0Sstevel@tonic-gate extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94*0Sstevel@tonic-gate extern uint64_t get_lsu(void);
95*0Sstevel@tonic-gate extern void set_lsu(uint64_t ncc);
96*0Sstevel@tonic-gate extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97*0Sstevel@tonic-gate 				uint64_t *oafsr, uint64_t *acc_afsr);
98*0Sstevel@tonic-gate extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99*0Sstevel@tonic-gate extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100*0Sstevel@tonic-gate 				uint64_t *acc_afsr);
101*0Sstevel@tonic-gate extern uint64_t read_and_clear_afsr();
102*0Sstevel@tonic-gate extern void write_ec_tag_parity(uint32_t id);
103*0Sstevel@tonic-gate extern void write_hb_ec_tag_parity(uint32_t id);
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate /*
106*0Sstevel@tonic-gate  * Spitfire module routines:
107*0Sstevel@tonic-gate  */
108*0Sstevel@tonic-gate static void cpu_async_log_err(void *flt);
109*0Sstevel@tonic-gate /*PRINTFLIKE6*/
110*0Sstevel@tonic-gate static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111*0Sstevel@tonic-gate     uint_t logflags, const char *endstr, const char *fmt, ...);
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114*0Sstevel@tonic-gate static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115*0Sstevel@tonic-gate static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate static void log_ce_err(struct async_flt *aflt, char *unum);
118*0Sstevel@tonic-gate static void log_ue_err(struct async_flt *aflt, char *unum);
119*0Sstevel@tonic-gate static void check_misc_err(spitf_async_flt *spf_flt);
120*0Sstevel@tonic-gate static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121*0Sstevel@tonic-gate static int check_ecc(struct async_flt *aflt);
122*0Sstevel@tonic-gate static uint_t get_cpu_status(uint64_t arg);
123*0Sstevel@tonic-gate static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124*0Sstevel@tonic-gate static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125*0Sstevel@tonic-gate 		int *m, uint64_t *afsr);
126*0Sstevel@tonic-gate static void ecache_kstat_init(struct cpu *cp);
127*0Sstevel@tonic-gate static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128*0Sstevel@tonic-gate 		uint64_t paddr, int mpb, uint64_t);
129*0Sstevel@tonic-gate static uint64_t ecache_scrub_misc_err(int, uint64_t);
130*0Sstevel@tonic-gate static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131*0Sstevel@tonic-gate static void ecache_page_retire(void *);
132*0Sstevel@tonic-gate static int ecc_kstat_update(kstat_t *ksp, int rw);
133*0Sstevel@tonic-gate static int ce_count_unum(int status, int len, char *unum);
134*0Sstevel@tonic-gate static void add_leaky_bucket_timeout(void);
135*0Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd);
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate extern uint_t read_all_memscrub;
138*0Sstevel@tonic-gate extern void memscrub_run(void);
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate static uchar_t	isus2i;			/* set if sabre */
141*0Sstevel@tonic-gate static uchar_t	isus2e;			/* set if hummingbird */
142*0Sstevel@tonic-gate 
143*0Sstevel@tonic-gate /*
144*0Sstevel@tonic-gate  * Default ecache mask and shift settings for Spitfire.  If we detect a
145*0Sstevel@tonic-gate  * different CPU implementation, we will modify these values at boot time.
146*0Sstevel@tonic-gate  */
147*0Sstevel@tonic-gate static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148*0Sstevel@tonic-gate static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149*0Sstevel@tonic-gate static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150*0Sstevel@tonic-gate static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151*0Sstevel@tonic-gate static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152*0Sstevel@tonic-gate static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153*0Sstevel@tonic-gate static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154*0Sstevel@tonic-gate static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155*0Sstevel@tonic-gate static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156*0Sstevel@tonic-gate static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate /*
159*0Sstevel@tonic-gate  * Default ecache state bits for Spitfire.  These individual bits indicate if
160*0Sstevel@tonic-gate  * the given line is in any of the valid or modified states, respectively.
161*0Sstevel@tonic-gate  * Again, we modify these at boot if we detect a different CPU.
162*0Sstevel@tonic-gate  */
163*0Sstevel@tonic-gate static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164*0Sstevel@tonic-gate static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165*0Sstevel@tonic-gate static uchar_t cpu_ec_parity		= S_EC_PARITY;
166*0Sstevel@tonic-gate static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate /*
169*0Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when an ECC
170*0Sstevel@tonic-gate  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171*0Sstevel@tonic-gate  * of this array have the following semantics:
172*0Sstevel@tonic-gate  *
173*0Sstevel@tonic-gate  *      00-63   The number of the bad bit, when only one bit is bad.
174*0Sstevel@tonic-gate  *      64      ECC bit C0 is bad.
175*0Sstevel@tonic-gate  *      65      ECC bit C1 is bad.
176*0Sstevel@tonic-gate  *      66      ECC bit C2 is bad.
177*0Sstevel@tonic-gate  *      67      ECC bit C3 is bad.
178*0Sstevel@tonic-gate  *      68      ECC bit C4 is bad.
179*0Sstevel@tonic-gate  *      69      ECC bit C5 is bad.
180*0Sstevel@tonic-gate  *      70      ECC bit C6 is bad.
181*0Sstevel@tonic-gate  *      71      ECC bit C7 is bad.
182*0Sstevel@tonic-gate  *      72      Two bits are bad.
183*0Sstevel@tonic-gate  *      73      Three bits are bad.
184*0Sstevel@tonic-gate  *      74      Four bits are bad.
185*0Sstevel@tonic-gate  *      75      More than Four bits are bad.
186*0Sstevel@tonic-gate  *      76      NO bits are bad.
187*0Sstevel@tonic-gate  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188*0Sstevel@tonic-gate  */
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate #define	C0	64
191*0Sstevel@tonic-gate #define	C1	65
192*0Sstevel@tonic-gate #define	C2	66
193*0Sstevel@tonic-gate #define	C3	67
194*0Sstevel@tonic-gate #define	C4	68
195*0Sstevel@tonic-gate #define	C5	69
196*0Sstevel@tonic-gate #define	C6	70
197*0Sstevel@tonic-gate #define	C7	71
198*0Sstevel@tonic-gate #define	M2	72
199*0Sstevel@tonic-gate #define	M3	73
200*0Sstevel@tonic-gate #define	M4	74
201*0Sstevel@tonic-gate #define	MX	75
202*0Sstevel@tonic-gate #define	NA	76
203*0Sstevel@tonic-gate 
204*0Sstevel@tonic-gate #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205*0Sstevel@tonic-gate 						    (synd_code < C0))
206*0Sstevel@tonic-gate #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207*0Sstevel@tonic-gate 						    (synd_code <= C7))
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate static char ecc_syndrome_tab[] =
210*0Sstevel@tonic-gate {
211*0Sstevel@tonic-gate 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212*0Sstevel@tonic-gate 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213*0Sstevel@tonic-gate 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214*0Sstevel@tonic-gate 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215*0Sstevel@tonic-gate 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216*0Sstevel@tonic-gate 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217*0Sstevel@tonic-gate 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218*0Sstevel@tonic-gate 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219*0Sstevel@tonic-gate 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220*0Sstevel@tonic-gate 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221*0Sstevel@tonic-gate 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222*0Sstevel@tonic-gate 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223*0Sstevel@tonic-gate 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224*0Sstevel@tonic-gate 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225*0Sstevel@tonic-gate 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226*0Sstevel@tonic-gate 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227*0Sstevel@tonic-gate };
228*0Sstevel@tonic-gate 
229*0Sstevel@tonic-gate #define	SYND_TBL_SIZE 256
230*0Sstevel@tonic-gate 
231*0Sstevel@tonic-gate /*
232*0Sstevel@tonic-gate  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233*0Sstevel@tonic-gate  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234*0Sstevel@tonic-gate  */
235*0Sstevel@tonic-gate #define	UDBL_REG	0x8000
236*0Sstevel@tonic-gate #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237*0Sstevel@tonic-gate #define	SYND(synd)	(synd & 0x7FFF)
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate /*
240*0Sstevel@tonic-gate  * These error types are specific to Spitfire and are used internally for the
241*0Sstevel@tonic-gate  * spitfire fault structure flt_type field.
242*0Sstevel@tonic-gate  */
243*0Sstevel@tonic-gate #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244*0Sstevel@tonic-gate #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245*0Sstevel@tonic-gate #define	CPU_WP_ERR		2	/* WP parity error */
246*0Sstevel@tonic-gate #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247*0Sstevel@tonic-gate #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248*0Sstevel@tonic-gate #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249*0Sstevel@tonic-gate #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250*0Sstevel@tonic-gate #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251*0Sstevel@tonic-gate #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252*0Sstevel@tonic-gate #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253*0Sstevel@tonic-gate #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254*0Sstevel@tonic-gate #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255*0Sstevel@tonic-gate #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256*0Sstevel@tonic-gate #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257*0Sstevel@tonic-gate #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258*0Sstevel@tonic-gate #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate /*
261*0Sstevel@tonic-gate  * Macro to access the "Spitfire cpu private" data structure.
262*0Sstevel@tonic-gate  */
263*0Sstevel@tonic-gate #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate /*
266*0Sstevel@tonic-gate  * set to 0 to disable automatic retiring of pages on
267*0Sstevel@tonic-gate  * DIMMs that have excessive soft errors
268*0Sstevel@tonic-gate  */
269*0Sstevel@tonic-gate int automatic_page_removal = 1;
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate /*
272*0Sstevel@tonic-gate  * Heuristic for figuring out which module to replace.
273*0Sstevel@tonic-gate  * Relative likelihood that this P_SYND indicates that this module is bad.
274*0Sstevel@tonic-gate  * We call it a "score", though, not a relative likelihood.
275*0Sstevel@tonic-gate  *
276*0Sstevel@tonic-gate  * Step 1.
277*0Sstevel@tonic-gate  * Assign a score to each byte of P_SYND according to the following rules:
278*0Sstevel@tonic-gate  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279*0Sstevel@tonic-gate  * If one bit on, give it a 95.
280*0Sstevel@tonic-gate  * If seven bits on, give it a 10.
281*0Sstevel@tonic-gate  * If two bits on:
282*0Sstevel@tonic-gate  *   in different nybbles, a 90
283*0Sstevel@tonic-gate  *   in same nybble, but unaligned, 85
284*0Sstevel@tonic-gate  *   in same nybble and as an aligned pair, 80
285*0Sstevel@tonic-gate  * If six bits on, look at the bits that are off:
286*0Sstevel@tonic-gate  *   in same nybble and as an aligned pair, 15
287*0Sstevel@tonic-gate  *   in same nybble, but unaligned, 20
288*0Sstevel@tonic-gate  *   in different nybbles, a 25
289*0Sstevel@tonic-gate  * If three bits on:
290*0Sstevel@tonic-gate  *   in diferent nybbles, no aligned pairs, 75
291*0Sstevel@tonic-gate  *   in diferent nybbles, one aligned pair, 70
292*0Sstevel@tonic-gate  *   in the same nybble, 65
293*0Sstevel@tonic-gate  * If five bits on, look at the bits that are off:
294*0Sstevel@tonic-gate  *   in the same nybble, 30
295*0Sstevel@tonic-gate  *   in diferent nybbles, one aligned pair, 35
296*0Sstevel@tonic-gate  *   in diferent nybbles, no aligned pairs, 40
297*0Sstevel@tonic-gate  * If four bits on:
298*0Sstevel@tonic-gate  *   all in one nybble, 45
299*0Sstevel@tonic-gate  *   as two aligned pairs, 50
300*0Sstevel@tonic-gate  *   one aligned pair, 55
301*0Sstevel@tonic-gate  *   no aligned pairs, 60
302*0Sstevel@tonic-gate  *
303*0Sstevel@tonic-gate  * Step 2:
304*0Sstevel@tonic-gate  * Take the higher of the two scores (one for each byte) as the score
305*0Sstevel@tonic-gate  * for the module.
306*0Sstevel@tonic-gate  *
307*0Sstevel@tonic-gate  * Print the score for each module, and field service should replace the
308*0Sstevel@tonic-gate  * module with the highest score.
309*0Sstevel@tonic-gate  */
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate /*
312*0Sstevel@tonic-gate  * In the table below, the first row/column comment indicates the
313*0Sstevel@tonic-gate  * number of bits on in that nybble; the second row/column comment is
314*0Sstevel@tonic-gate  * the hex digit.
315*0Sstevel@tonic-gate  */
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate static int
318*0Sstevel@tonic-gate p_synd_score_table[256] = {
319*0Sstevel@tonic-gate 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320*0Sstevel@tonic-gate 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321*0Sstevel@tonic-gate /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322*0Sstevel@tonic-gate /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323*0Sstevel@tonic-gate /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324*0Sstevel@tonic-gate /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325*0Sstevel@tonic-gate /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326*0Sstevel@tonic-gate /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327*0Sstevel@tonic-gate /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328*0Sstevel@tonic-gate /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329*0Sstevel@tonic-gate /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330*0Sstevel@tonic-gate /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331*0Sstevel@tonic-gate /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332*0Sstevel@tonic-gate /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333*0Sstevel@tonic-gate /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334*0Sstevel@tonic-gate /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335*0Sstevel@tonic-gate /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336*0Sstevel@tonic-gate /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337*0Sstevel@tonic-gate };
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate int
340*0Sstevel@tonic-gate ecc_psynd_score(ushort_t p_synd)
341*0Sstevel@tonic-gate {
342*0Sstevel@tonic-gate 	int i, j, a, b;
343*0Sstevel@tonic-gate 
344*0Sstevel@tonic-gate 	i = p_synd & 0xFF;
345*0Sstevel@tonic-gate 	j = (p_synd >> 8) & 0xFF;
346*0Sstevel@tonic-gate 
347*0Sstevel@tonic-gate 	a = p_synd_score_table[i];
348*0Sstevel@tonic-gate 	b = p_synd_score_table[j];
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate 	return (a > b ? a : b);
351*0Sstevel@tonic-gate }
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate /*
354*0Sstevel@tonic-gate  * Async Fault Logging
355*0Sstevel@tonic-gate  *
356*0Sstevel@tonic-gate  * To ease identifying, reading, and filtering async fault log messages, the
357*0Sstevel@tonic-gate  * label [AFT#] is now prepended to each async fault message.  These messages
358*0Sstevel@tonic-gate  * and the logging rules are implemented by cpu_aflt_log(), below.
359*0Sstevel@tonic-gate  *
360*0Sstevel@tonic-gate  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361*0Sstevel@tonic-gate  *          This includes both corrected ECC memory and ecache faults.
362*0Sstevel@tonic-gate  *
363*0Sstevel@tonic-gate  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364*0Sstevel@tonic-gate  *          else except CE errors) with a priority of 1 (highest).  This tag
365*0Sstevel@tonic-gate  *          is also used for panic messages that result from an async fault.
366*0Sstevel@tonic-gate  *
367*0Sstevel@tonic-gate  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368*0Sstevel@tonic-gate  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369*0Sstevel@tonic-gate  *          of the E-$ data and tags.
370*0Sstevel@tonic-gate  *
371*0Sstevel@tonic-gate  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372*0Sstevel@tonic-gate  * printed on the console.  To send all AFT logs to both the log and the
373*0Sstevel@tonic-gate  * console, set aft_verbose = 1.
374*0Sstevel@tonic-gate  */
375*0Sstevel@tonic-gate 
376*0Sstevel@tonic-gate #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377*0Sstevel@tonic-gate #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378*0Sstevel@tonic-gate #define	CPU_ERRID		0x0004	/* print flt_id */
379*0Sstevel@tonic-gate #define	CPU_TL			0x0008	/* print flt_tl */
380*0Sstevel@tonic-gate #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381*0Sstevel@tonic-gate #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382*0Sstevel@tonic-gate #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383*0Sstevel@tonic-gate #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384*0Sstevel@tonic-gate #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385*0Sstevel@tonic-gate #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386*0Sstevel@tonic-gate #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387*0Sstevel@tonic-gate #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388*0Sstevel@tonic-gate #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389*0Sstevel@tonic-gate 
390*0Sstevel@tonic-gate #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391*0Sstevel@tonic-gate 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392*0Sstevel@tonic-gate 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393*0Sstevel@tonic-gate 				CPU_FAULTPC)
394*0Sstevel@tonic-gate #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395*0Sstevel@tonic-gate #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396*0Sstevel@tonic-gate 				~CPU_SPACE)
397*0Sstevel@tonic-gate #define	PARERR_LFLAGS	(CMN_LFLAGS)
398*0Sstevel@tonic-gate #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399*0Sstevel@tonic-gate #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400*0Sstevel@tonic-gate 				~CPU_FLTCPU & ~CPU_FAULTPC)
401*0Sstevel@tonic-gate #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402*0Sstevel@tonic-gate #define	NO_LFLAGS	(0)
403*0Sstevel@tonic-gate 
404*0Sstevel@tonic-gate #define	AFSR_FMTSTR0	"\020\1ME"
405*0Sstevel@tonic-gate #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406*0Sstevel@tonic-gate 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407*0Sstevel@tonic-gate #define	UDB_FMTSTR	"\020\012UE\011CE"
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate /*
410*0Sstevel@tonic-gate  * Maximum number of contexts for Spitfire.
411*0Sstevel@tonic-gate  */
412*0Sstevel@tonic-gate #define	MAX_NCTXS	(1 << 13)
413*0Sstevel@tonic-gate 
414*0Sstevel@tonic-gate /*
415*0Sstevel@tonic-gate  * Save the cache bootup state for use when internal
416*0Sstevel@tonic-gate  * caches are to be re-enabled after an error occurs.
417*0Sstevel@tonic-gate  */
418*0Sstevel@tonic-gate uint64_t	cache_boot_state = 0;
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate /*
421*0Sstevel@tonic-gate  * PA[31:0] represent Displacement in UPA configuration space.
422*0Sstevel@tonic-gate  */
423*0Sstevel@tonic-gate uint_t	root_phys_addr_lo_mask = 0xffffffff;
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate /*
426*0Sstevel@tonic-gate  * Spitfire legacy globals
427*0Sstevel@tonic-gate  */
428*0Sstevel@tonic-gate int	itlb_entries;
429*0Sstevel@tonic-gate int	dtlb_entries;
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate void
432*0Sstevel@tonic-gate cpu_setup(void)
433*0Sstevel@tonic-gate {
434*0Sstevel@tonic-gate 	extern int page_retire_messages;
435*0Sstevel@tonic-gate 	extern int at_flags;
436*0Sstevel@tonic-gate #if defined(SF_ERRATA_57)
437*0Sstevel@tonic-gate 	extern caddr_t errata57_limit;
438*0Sstevel@tonic-gate #endif
439*0Sstevel@tonic-gate 	extern int disable_text_largepages;
440*0Sstevel@tonic-gate 	extern int disable_initdata_largepages;
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
445*0Sstevel@tonic-gate 
446*0Sstevel@tonic-gate 	/*
447*0Sstevel@tonic-gate 	 * Spitfire isn't currently FMA-aware, so we have to enable the
448*0Sstevel@tonic-gate 	 * page retirement messages.
449*0Sstevel@tonic-gate 	 */
450*0Sstevel@tonic-gate 	page_retire_messages = 1;
451*0Sstevel@tonic-gate 
452*0Sstevel@tonic-gate 	/*
453*0Sstevel@tonic-gate 	 * save the cache bootup state.
454*0Sstevel@tonic-gate 	 */
455*0Sstevel@tonic-gate 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
456*0Sstevel@tonic-gate 
457*0Sstevel@tonic-gate 	/*
458*0Sstevel@tonic-gate 	 * Use the maximum number of contexts available for Spitfire unless
459*0Sstevel@tonic-gate 	 * it has been tuned for debugging.
460*0Sstevel@tonic-gate 	 * We are checking against 0 here since this value can be patched
461*0Sstevel@tonic-gate 	 * while booting.  It can not be patched via /etc/system since it
462*0Sstevel@tonic-gate 	 * will be patched too late and thus cause the system to panic.
463*0Sstevel@tonic-gate 	 */
464*0Sstevel@tonic-gate 	if (nctxs == 0)
465*0Sstevel@tonic-gate 		nctxs = MAX_NCTXS;
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 	if (use_page_coloring) {
468*0Sstevel@tonic-gate 		do_pg_coloring = 1;
469*0Sstevel@tonic-gate 		if (use_virtual_coloring)
470*0Sstevel@tonic-gate 			do_virtual_coloring = 1;
471*0Sstevel@tonic-gate 	}
472*0Sstevel@tonic-gate 
473*0Sstevel@tonic-gate 	/*
474*0Sstevel@tonic-gate 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
475*0Sstevel@tonic-gate 	 */
476*0Sstevel@tonic-gate 	pp_slots = MIN(8, MAXPP_SLOTS);
477*0Sstevel@tonic-gate 
478*0Sstevel@tonic-gate 	/*
479*0Sstevel@tonic-gate 	 * Block stores invalidate all pages of the d$ so pagecopy
480*0Sstevel@tonic-gate 	 * et. al. do not need virtual translations with virtual
481*0Sstevel@tonic-gate 	 * coloring taken into consideration.
482*0Sstevel@tonic-gate 	 */
483*0Sstevel@tonic-gate 	pp_consistent_coloring = 0;
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate 	isa_list =
486*0Sstevel@tonic-gate 	    "sparcv9+vis sparcv9 "
487*0Sstevel@tonic-gate 	    "sparcv8plus+vis sparcv8plus "
488*0Sstevel@tonic-gate 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
489*0Sstevel@tonic-gate 
490*0Sstevel@tonic-gate 	cpu_hwcap_flags = AV_SPARC_VIS;
491*0Sstevel@tonic-gate 
492*0Sstevel@tonic-gate 	/*
493*0Sstevel@tonic-gate 	 * On Spitfire, there's a hole in the address space
494*0Sstevel@tonic-gate 	 * that we must never map (the hardware only support 44-bits of
495*0Sstevel@tonic-gate 	 * virtual address).  Later CPUs are expected to have wider
496*0Sstevel@tonic-gate 	 * supported address ranges.
497*0Sstevel@tonic-gate 	 *
498*0Sstevel@tonic-gate 	 * See address map on p23 of the UltraSPARC 1 user's manual.
499*0Sstevel@tonic-gate 	 */
500*0Sstevel@tonic-gate 	hole_start = (caddr_t)0x80000000000ull;
501*0Sstevel@tonic-gate 	hole_end = (caddr_t)0xfffff80000000000ull;
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate 	/*
504*0Sstevel@tonic-gate 	 * A spitfire call bug requires us to be a further 4Gbytes of
505*0Sstevel@tonic-gate 	 * firewall from the spec.
506*0Sstevel@tonic-gate 	 *
507*0Sstevel@tonic-gate 	 * See Spitfire Errata #21
508*0Sstevel@tonic-gate 	 */
509*0Sstevel@tonic-gate 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
510*0Sstevel@tonic-gate 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate 	/*
513*0Sstevel@tonic-gate 	 * The kpm mapping window.
514*0Sstevel@tonic-gate 	 * kpm_size:
515*0Sstevel@tonic-gate 	 *	The size of a single kpm range.
516*0Sstevel@tonic-gate 	 *	The overall size will be: kpm_size * vac_colors.
517*0Sstevel@tonic-gate 	 * kpm_vbase:
518*0Sstevel@tonic-gate 	 *	The virtual start address of the kpm range within the kernel
519*0Sstevel@tonic-gate 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
520*0Sstevel@tonic-gate 	 */
521*0Sstevel@tonic-gate 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
522*0Sstevel@tonic-gate 	kpm_size_shift = 41;
523*0Sstevel@tonic-gate 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
524*0Sstevel@tonic-gate 
525*0Sstevel@tonic-gate #if defined(SF_ERRATA_57)
526*0Sstevel@tonic-gate 	errata57_limit = (caddr_t)0x80000000ul;
527*0Sstevel@tonic-gate #endif
528*0Sstevel@tonic-gate 
529*0Sstevel@tonic-gate 	/*
530*0Sstevel@tonic-gate 	 * Allow only 8K, 64K and 4M pages for text by default.
531*0Sstevel@tonic-gate 	 * Allow only 8K and 64K page for initialized data segments by
532*0Sstevel@tonic-gate 	 * default.
533*0Sstevel@tonic-gate 	 */
534*0Sstevel@tonic-gate 	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
535*0Sstevel@tonic-gate 	    (1 << TTE256M);
536*0Sstevel@tonic-gate 	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
537*0Sstevel@tonic-gate 	    (1 << TTE32M) | (1 << TTE256M);
538*0Sstevel@tonic-gate }
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate static int
541*0Sstevel@tonic-gate getintprop(dnode_t node, char *name, int deflt)
542*0Sstevel@tonic-gate {
543*0Sstevel@tonic-gate 	int	value;
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 	switch (prom_getproplen(node, name)) {
546*0Sstevel@tonic-gate 	case 0:
547*0Sstevel@tonic-gate 		value = 1;	/* boolean properties */
548*0Sstevel@tonic-gate 		break;
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate 	case sizeof (int):
551*0Sstevel@tonic-gate 		(void) prom_getprop(node, name, (caddr_t)&value);
552*0Sstevel@tonic-gate 		break;
553*0Sstevel@tonic-gate 
554*0Sstevel@tonic-gate 	default:
555*0Sstevel@tonic-gate 		value = deflt;
556*0Sstevel@tonic-gate 		break;
557*0Sstevel@tonic-gate 	}
558*0Sstevel@tonic-gate 
559*0Sstevel@tonic-gate 	return (value);
560*0Sstevel@tonic-gate }
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate /*
563*0Sstevel@tonic-gate  * Set the magic constants of the implementation.
564*0Sstevel@tonic-gate  */
565*0Sstevel@tonic-gate void
566*0Sstevel@tonic-gate cpu_fiximp(dnode_t dnode)
567*0Sstevel@tonic-gate {
568*0Sstevel@tonic-gate 	extern int vac_size, vac_shift;
569*0Sstevel@tonic-gate 	extern uint_t vac_mask;
570*0Sstevel@tonic-gate 	extern int dcache_line_mask;
571*0Sstevel@tonic-gate 	int i, a;
572*0Sstevel@tonic-gate 	static struct {
573*0Sstevel@tonic-gate 		char	*name;
574*0Sstevel@tonic-gate 		int	*var;
575*0Sstevel@tonic-gate 	} prop[] = {
576*0Sstevel@tonic-gate 		"dcache-size",		&dcache_size,
577*0Sstevel@tonic-gate 		"dcache-line-size",	&dcache_linesize,
578*0Sstevel@tonic-gate 		"icache-size",		&icache_size,
579*0Sstevel@tonic-gate 		"icache-line-size",	&icache_linesize,
580*0Sstevel@tonic-gate 		"ecache-size",		&ecache_size,
581*0Sstevel@tonic-gate 		"ecache-line-size",	&ecache_alignsize,
582*0Sstevel@tonic-gate 		"ecache-associativity", &ecache_associativity,
583*0Sstevel@tonic-gate 		"#itlb-entries",	&itlb_entries,
584*0Sstevel@tonic-gate 		"#dtlb-entries",	&dtlb_entries,
585*0Sstevel@tonic-gate 		};
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
588*0Sstevel@tonic-gate 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
589*0Sstevel@tonic-gate 			*prop[i].var = a;
590*0Sstevel@tonic-gate 		}
591*0Sstevel@tonic-gate 	}
592*0Sstevel@tonic-gate 
593*0Sstevel@tonic-gate 	ecache_setsize = ecache_size / ecache_associativity;
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	vac_size = S_VAC_SIZE;
596*0Sstevel@tonic-gate 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
597*0Sstevel@tonic-gate 	i = 0; a = vac_size;
598*0Sstevel@tonic-gate 	while (a >>= 1)
599*0Sstevel@tonic-gate 		++i;
600*0Sstevel@tonic-gate 	vac_shift = i;
601*0Sstevel@tonic-gate 	shm_alignment = vac_size;
602*0Sstevel@tonic-gate 	vac = 1;
603*0Sstevel@tonic-gate 
604*0Sstevel@tonic-gate 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
605*0Sstevel@tonic-gate 
606*0Sstevel@tonic-gate 	/*
607*0Sstevel@tonic-gate 	 * UltraSPARC I & II have ecache sizes running
608*0Sstevel@tonic-gate 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
609*0Sstevel@tonic-gate 	 * and 8 MB. Adjust the copyin/copyout limits
610*0Sstevel@tonic-gate 	 * according to the cache size. The magic number
611*0Sstevel@tonic-gate 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
612*0Sstevel@tonic-gate 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
613*0Sstevel@tonic-gate 	 * VIS instructions.
614*0Sstevel@tonic-gate 	 *
615*0Sstevel@tonic-gate 	 * We assume that all CPUs on the system have the same size
616*0Sstevel@tonic-gate 	 * ecache. We're also called very early in the game.
617*0Sstevel@tonic-gate 	 * /etc/system will be parsed *after* we're called so
618*0Sstevel@tonic-gate 	 * these values can be overwritten.
619*0Sstevel@tonic-gate 	 */
620*0Sstevel@tonic-gate 
621*0Sstevel@tonic-gate 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
622*0Sstevel@tonic-gate 	if (ecache_size <= 524288) {
623*0Sstevel@tonic-gate 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
624*0Sstevel@tonic-gate 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
625*0Sstevel@tonic-gate 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
626*0Sstevel@tonic-gate 	} else if (ecache_size == 1048576) {
627*0Sstevel@tonic-gate 		hw_copy_limit_2 = 1024;
628*0Sstevel@tonic-gate 		hw_copy_limit_4 = 1280;
629*0Sstevel@tonic-gate 		hw_copy_limit_8 = 1536;
630*0Sstevel@tonic-gate 	} else if (ecache_size == 2097152) {
631*0Sstevel@tonic-gate 		hw_copy_limit_2 = 1536;
632*0Sstevel@tonic-gate 		hw_copy_limit_4 = 2048;
633*0Sstevel@tonic-gate 		hw_copy_limit_8 = 2560;
634*0Sstevel@tonic-gate 	} else if (ecache_size == 4194304) {
635*0Sstevel@tonic-gate 		hw_copy_limit_2 = 2048;
636*0Sstevel@tonic-gate 		hw_copy_limit_4 = 2560;
637*0Sstevel@tonic-gate 		hw_copy_limit_8 = 3072;
638*0Sstevel@tonic-gate 	} else {
639*0Sstevel@tonic-gate 		hw_copy_limit_2 = 2560;
640*0Sstevel@tonic-gate 		hw_copy_limit_4 = 3072;
641*0Sstevel@tonic-gate 		hw_copy_limit_8 = 3584;
642*0Sstevel@tonic-gate 	}
643*0Sstevel@tonic-gate }
644*0Sstevel@tonic-gate 
645*0Sstevel@tonic-gate /*
646*0Sstevel@tonic-gate  * Called by setcpudelay
647*0Sstevel@tonic-gate  */
648*0Sstevel@tonic-gate void
649*0Sstevel@tonic-gate cpu_init_tick_freq(void)
650*0Sstevel@tonic-gate {
651*0Sstevel@tonic-gate 	/*
652*0Sstevel@tonic-gate 	 * Determine the cpu frequency by calling
653*0Sstevel@tonic-gate 	 * tod_get_cpufrequency. Use an approximate freqency
654*0Sstevel@tonic-gate 	 * value computed by the prom if the tod module
655*0Sstevel@tonic-gate 	 * is not initialized and loaded yet.
656*0Sstevel@tonic-gate 	 */
657*0Sstevel@tonic-gate 	if (tod_ops.tod_get_cpufrequency != NULL) {
658*0Sstevel@tonic-gate 		mutex_enter(&tod_lock);
659*0Sstevel@tonic-gate 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
660*0Sstevel@tonic-gate 		mutex_exit(&tod_lock);
661*0Sstevel@tonic-gate 	} else {
662*0Sstevel@tonic-gate #if defined(HUMMINGBIRD)
663*0Sstevel@tonic-gate 		/*
664*0Sstevel@tonic-gate 		 * the hummingbird version of %stick is used as the basis for
665*0Sstevel@tonic-gate 		 * low level timing; this provides an independent constant-rate
666*0Sstevel@tonic-gate 		 * clock for general system use, and frees power mgmt to set
667*0Sstevel@tonic-gate 		 * various cpu clock speeds.
668*0Sstevel@tonic-gate 		 */
669*0Sstevel@tonic-gate 		if (system_clock_freq == 0)
670*0Sstevel@tonic-gate 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
671*0Sstevel@tonic-gate 			    system_clock_freq);
672*0Sstevel@tonic-gate 		sys_tick_freq = system_clock_freq;
673*0Sstevel@tonic-gate #else /* SPITFIRE */
674*0Sstevel@tonic-gate 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
675*0Sstevel@tonic-gate #endif
676*0Sstevel@tonic-gate 	}
677*0Sstevel@tonic-gate }
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate 
680*0Sstevel@tonic-gate void shipit(int upaid);
681*0Sstevel@tonic-gate extern uint64_t xc_tick_limit;
682*0Sstevel@tonic-gate extern uint64_t xc_tick_jump_limit;
683*0Sstevel@tonic-gate 
684*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
685*0Sstevel@tonic-gate uint64_t x_early[NCPU][64];
686*0Sstevel@tonic-gate #endif
687*0Sstevel@tonic-gate 
688*0Sstevel@tonic-gate /*
689*0Sstevel@tonic-gate  * Note: A version of this function is used by the debugger via the KDI,
690*0Sstevel@tonic-gate  * and must be kept in sync with this version.  Any changes made to this
691*0Sstevel@tonic-gate  * function to support new chips or to accomodate errata must also be included
692*0Sstevel@tonic-gate  * in the KDI-specific version.  See spitfire_kdi.c.
693*0Sstevel@tonic-gate  */
694*0Sstevel@tonic-gate void
695*0Sstevel@tonic-gate send_one_mondo(int cpuid)
696*0Sstevel@tonic-gate {
697*0Sstevel@tonic-gate 	uint64_t idsr, starttick, endtick;
698*0Sstevel@tonic-gate 	int upaid, busy, nack;
699*0Sstevel@tonic-gate 	uint64_t tick, tick_prev;
700*0Sstevel@tonic-gate 	ulong_t ticks;
701*0Sstevel@tonic-gate 
702*0Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
703*0Sstevel@tonic-gate 	upaid = CPUID_TO_UPAID(cpuid);
704*0Sstevel@tonic-gate 	tick = starttick = gettick();
705*0Sstevel@tonic-gate 	shipit(upaid);
706*0Sstevel@tonic-gate 	endtick = starttick + xc_tick_limit;
707*0Sstevel@tonic-gate 	busy = nack = 0;
708*0Sstevel@tonic-gate 	for (;;) {
709*0Sstevel@tonic-gate 		idsr = getidsr();
710*0Sstevel@tonic-gate 		if (idsr == 0)
711*0Sstevel@tonic-gate 			break;
712*0Sstevel@tonic-gate 		/*
713*0Sstevel@tonic-gate 		 * When we detect an irregular tick jump, we adjust
714*0Sstevel@tonic-gate 		 * the timer window to the current tick value.
715*0Sstevel@tonic-gate 		 */
716*0Sstevel@tonic-gate 		tick_prev = tick;
717*0Sstevel@tonic-gate 		tick = gettick();
718*0Sstevel@tonic-gate 		ticks = tick - tick_prev;
719*0Sstevel@tonic-gate 		if (ticks > xc_tick_jump_limit) {
720*0Sstevel@tonic-gate 			endtick = tick + xc_tick_limit;
721*0Sstevel@tonic-gate 		} else if (tick > endtick) {
722*0Sstevel@tonic-gate 			if (panic_quiesce)
723*0Sstevel@tonic-gate 				return;
724*0Sstevel@tonic-gate 			cmn_err(CE_PANIC,
725*0Sstevel@tonic-gate 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
726*0Sstevel@tonic-gate 			upaid, nack, busy);
727*0Sstevel@tonic-gate 		}
728*0Sstevel@tonic-gate 		if (idsr & IDSR_BUSY) {
729*0Sstevel@tonic-gate 			busy++;
730*0Sstevel@tonic-gate 			continue;
731*0Sstevel@tonic-gate 		}
732*0Sstevel@tonic-gate 		drv_usecwait(1);
733*0Sstevel@tonic-gate 		shipit(upaid);
734*0Sstevel@tonic-gate 		nack++;
735*0Sstevel@tonic-gate 		busy = 0;
736*0Sstevel@tonic-gate 	}
737*0Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
738*0Sstevel@tonic-gate 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
739*0Sstevel@tonic-gate #endif
740*0Sstevel@tonic-gate }
741*0Sstevel@tonic-gate 
742*0Sstevel@tonic-gate void
743*0Sstevel@tonic-gate send_mondo_set(cpuset_t set)
744*0Sstevel@tonic-gate {
745*0Sstevel@tonic-gate 	int i;
746*0Sstevel@tonic-gate 
747*0Sstevel@tonic-gate 	for (i = 0; i < NCPU; i++)
748*0Sstevel@tonic-gate 		if (CPU_IN_SET(set, i)) {
749*0Sstevel@tonic-gate 			send_one_mondo(i);
750*0Sstevel@tonic-gate 			CPUSET_DEL(set, i);
751*0Sstevel@tonic-gate 			if (CPUSET_ISNULL(set))
752*0Sstevel@tonic-gate 				break;
753*0Sstevel@tonic-gate 		}
754*0Sstevel@tonic-gate }
755*0Sstevel@tonic-gate 
756*0Sstevel@tonic-gate void
757*0Sstevel@tonic-gate syncfpu(void)
758*0Sstevel@tonic-gate {
759*0Sstevel@tonic-gate }
760*0Sstevel@tonic-gate 
761*0Sstevel@tonic-gate /*
762*0Sstevel@tonic-gate  * Determine the size of the CPU module's error structure in bytes.  This is
763*0Sstevel@tonic-gate  * called once during boot to initialize the error queues.
764*0Sstevel@tonic-gate  */
765*0Sstevel@tonic-gate int
766*0Sstevel@tonic-gate cpu_aflt_size(void)
767*0Sstevel@tonic-gate {
768*0Sstevel@tonic-gate 	/*
769*0Sstevel@tonic-gate 	 * We need to determine whether this is a sabre, Hummingbird or a
770*0Sstevel@tonic-gate 	 * Spitfire/Blackbird impl and set the appropriate state variables for
771*0Sstevel@tonic-gate 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
772*0Sstevel@tonic-gate 	 * too early in the boot flow and the cpunodes are not initialized.
773*0Sstevel@tonic-gate 	 * This routine will be called once after cpunodes[] is ready, so do
774*0Sstevel@tonic-gate 	 * it here.
775*0Sstevel@tonic-gate 	 */
776*0Sstevel@tonic-gate 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
777*0Sstevel@tonic-gate 		isus2i = 1;
778*0Sstevel@tonic-gate 		cpu_ec_tag_mask = SB_ECTAG_MASK;
779*0Sstevel@tonic-gate 		cpu_ec_state_mask = SB_ECSTATE_MASK;
780*0Sstevel@tonic-gate 		cpu_ec_par_mask = SB_ECPAR_MASK;
781*0Sstevel@tonic-gate 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
782*0Sstevel@tonic-gate 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
783*0Sstevel@tonic-gate 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
784*0Sstevel@tonic-gate 		cpu_ec_state_exl = SB_ECSTATE_EXL;
785*0Sstevel@tonic-gate 		cpu_ec_state_mod = SB_ECSTATE_MOD;
786*0Sstevel@tonic-gate 
787*0Sstevel@tonic-gate 		/* These states do not exist in sabre - set to 0xFF */
788*0Sstevel@tonic-gate 		cpu_ec_state_shr = 0xFF;
789*0Sstevel@tonic-gate 		cpu_ec_state_own = 0xFF;
790*0Sstevel@tonic-gate 
791*0Sstevel@tonic-gate 		cpu_ec_state_valid = SB_ECSTATE_VALID;
792*0Sstevel@tonic-gate 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
793*0Sstevel@tonic-gate 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
794*0Sstevel@tonic-gate 		cpu_ec_parity = SB_EC_PARITY;
795*0Sstevel@tonic-gate 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
796*0Sstevel@tonic-gate 		isus2e = 1;
797*0Sstevel@tonic-gate 		cpu_ec_tag_mask = HB_ECTAG_MASK;
798*0Sstevel@tonic-gate 		cpu_ec_state_mask = HB_ECSTATE_MASK;
799*0Sstevel@tonic-gate 		cpu_ec_par_mask = HB_ECPAR_MASK;
800*0Sstevel@tonic-gate 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
801*0Sstevel@tonic-gate 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
802*0Sstevel@tonic-gate 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
803*0Sstevel@tonic-gate 		cpu_ec_state_exl = HB_ECSTATE_EXL;
804*0Sstevel@tonic-gate 		cpu_ec_state_mod = HB_ECSTATE_MOD;
805*0Sstevel@tonic-gate 
806*0Sstevel@tonic-gate 		/* These states do not exist in hummingbird - set to 0xFF */
807*0Sstevel@tonic-gate 		cpu_ec_state_shr = 0xFF;
808*0Sstevel@tonic-gate 		cpu_ec_state_own = 0xFF;
809*0Sstevel@tonic-gate 
810*0Sstevel@tonic-gate 		cpu_ec_state_valid = HB_ECSTATE_VALID;
811*0Sstevel@tonic-gate 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
812*0Sstevel@tonic-gate 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
813*0Sstevel@tonic-gate 		cpu_ec_parity = HB_EC_PARITY;
814*0Sstevel@tonic-gate 	}
815*0Sstevel@tonic-gate 
816*0Sstevel@tonic-gate 	return (sizeof (spitf_async_flt));
817*0Sstevel@tonic-gate }
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate 
820*0Sstevel@tonic-gate /*
821*0Sstevel@tonic-gate  * Correctable ecc error trap handler
822*0Sstevel@tonic-gate  */
823*0Sstevel@tonic-gate /*ARGSUSED*/
824*0Sstevel@tonic-gate void
825*0Sstevel@tonic-gate cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
826*0Sstevel@tonic-gate 	uint_t p_afsr_high, uint_t p_afar_high)
827*0Sstevel@tonic-gate {
828*0Sstevel@tonic-gate 	ushort_t sdbh, sdbl;
829*0Sstevel@tonic-gate 	ushort_t e_syndh, e_syndl;
830*0Sstevel@tonic-gate 	spitf_async_flt spf_flt;
831*0Sstevel@tonic-gate 	struct async_flt *ecc;
832*0Sstevel@tonic-gate 	int queue = 1;
833*0Sstevel@tonic-gate 
834*0Sstevel@tonic-gate 	uint64_t t_afar = p_afar;
835*0Sstevel@tonic-gate 	uint64_t t_afsr = p_afsr;
836*0Sstevel@tonic-gate 
837*0Sstevel@tonic-gate 	/*
838*0Sstevel@tonic-gate 	 * Note: the Spitfire data buffer error registers
839*0Sstevel@tonic-gate 	 * (upper and lower halves) are or'ed into the upper
840*0Sstevel@tonic-gate 	 * word of the afsr by ce_err().
841*0Sstevel@tonic-gate 	 */
842*0Sstevel@tonic-gate 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
843*0Sstevel@tonic-gate 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
844*0Sstevel@tonic-gate 
845*0Sstevel@tonic-gate 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
846*0Sstevel@tonic-gate 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
847*0Sstevel@tonic-gate 
848*0Sstevel@tonic-gate 	t_afsr &= S_AFSR_MASK;
849*0Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
850*0Sstevel@tonic-gate 
851*0Sstevel@tonic-gate 	/* Setup the async fault structure */
852*0Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
853*0Sstevel@tonic-gate 	ecc = (struct async_flt *)&spf_flt;
854*0Sstevel@tonic-gate 	ecc->flt_id = gethrtime_waitfree();
855*0Sstevel@tonic-gate 	ecc->flt_stat = t_afsr;
856*0Sstevel@tonic-gate 	ecc->flt_addr = t_afar;
857*0Sstevel@tonic-gate 	ecc->flt_status = ECC_C_TRAP;
858*0Sstevel@tonic-gate 	ecc->flt_bus_id = getprocessorid();
859*0Sstevel@tonic-gate 	ecc->flt_inst = CPU->cpu_id;
860*0Sstevel@tonic-gate 	ecc->flt_pc = (caddr_t)rp->r_pc;
861*0Sstevel@tonic-gate 	ecc->flt_func = log_ce_err;
862*0Sstevel@tonic-gate 	ecc->flt_in_memory =
863*0Sstevel@tonic-gate 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
864*0Sstevel@tonic-gate 	spf_flt.flt_sdbh = sdbh;
865*0Sstevel@tonic-gate 	spf_flt.flt_sdbl = sdbl;
866*0Sstevel@tonic-gate 
867*0Sstevel@tonic-gate 	/*
868*0Sstevel@tonic-gate 	 * Check for fatal conditions.
869*0Sstevel@tonic-gate 	 */
870*0Sstevel@tonic-gate 	check_misc_err(&spf_flt);
871*0Sstevel@tonic-gate 
872*0Sstevel@tonic-gate 	/*
873*0Sstevel@tonic-gate 	 * Pananoid checks for valid AFSR and UDBs
874*0Sstevel@tonic-gate 	 */
875*0Sstevel@tonic-gate 	if ((t_afsr & P_AFSR_CE) == 0) {
876*0Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
877*0Sstevel@tonic-gate 			"** Panic due to CE bit not set in the AFSR",
878*0Sstevel@tonic-gate 			"  Corrected Memory Error on");
879*0Sstevel@tonic-gate 	}
880*0Sstevel@tonic-gate 
881*0Sstevel@tonic-gate 	/*
882*0Sstevel@tonic-gate 	 * We want to skip logging only if ALL the following
883*0Sstevel@tonic-gate 	 * conditions are true:
884*0Sstevel@tonic-gate 	 *
885*0Sstevel@tonic-gate 	 *	1. There is only one error
886*0Sstevel@tonic-gate 	 *	2. That error is a correctable memory error
887*0Sstevel@tonic-gate 	 *	3. The error is caused by the memory scrubber (in which case
888*0Sstevel@tonic-gate 	 *	    the error will have occurred under on_trap protection)
889*0Sstevel@tonic-gate 	 *	4. The error is on a retired page
890*0Sstevel@tonic-gate 	 *
891*0Sstevel@tonic-gate 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
892*0Sstevel@tonic-gate 	 * However, none of those errors should occur on a retired page.
893*0Sstevel@tonic-gate 	 */
894*0Sstevel@tonic-gate 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
895*0Sstevel@tonic-gate 	    curthread->t_ontrap != NULL) {
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
898*0Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
899*0Sstevel@tonic-gate 			    (ecc->flt_addr >> MMU_PAGESHIFT));
900*0Sstevel@tonic-gate 
901*0Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
902*0Sstevel@tonic-gate 				queue = 0;
903*0Sstevel@tonic-gate 			}
904*0Sstevel@tonic-gate 		}
905*0Sstevel@tonic-gate 	}
906*0Sstevel@tonic-gate 
907*0Sstevel@tonic-gate 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
908*0Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
909*0Sstevel@tonic-gate 			"** Panic due to CE bits not set in the UDBs",
910*0Sstevel@tonic-gate 			" Corrected Memory Error on");
911*0Sstevel@tonic-gate 	}
912*0Sstevel@tonic-gate 
913*0Sstevel@tonic-gate 	if ((sdbh >> 8) & 1) {
914*0Sstevel@tonic-gate 		ecc->flt_synd = e_syndh;
915*0Sstevel@tonic-gate 		ce_scrub(ecc);
916*0Sstevel@tonic-gate 		if (queue) {
917*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
918*0Sstevel@tonic-gate 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
919*0Sstevel@tonic-gate 		}
920*0Sstevel@tonic-gate 	}
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate 	if ((sdbl >> 8) & 1) {
923*0Sstevel@tonic-gate 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
924*0Sstevel@tonic-gate 		ecc->flt_synd = e_syndl | UDBL_REG;
925*0Sstevel@tonic-gate 		ce_scrub(ecc);
926*0Sstevel@tonic-gate 		if (queue) {
927*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
928*0Sstevel@tonic-gate 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
929*0Sstevel@tonic-gate 		}
930*0Sstevel@tonic-gate 	}
931*0Sstevel@tonic-gate 
932*0Sstevel@tonic-gate 	/*
933*0Sstevel@tonic-gate 	 * Re-enable all error trapping (CEEN currently cleared).
934*0Sstevel@tonic-gate 	 */
935*0Sstevel@tonic-gate 	clr_datapath();
936*0Sstevel@tonic-gate 	set_asyncflt(P_AFSR_CE);
937*0Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
938*0Sstevel@tonic-gate }
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate /*
941*0Sstevel@tonic-gate  * Cpu specific CE logging routine
942*0Sstevel@tonic-gate  */
943*0Sstevel@tonic-gate static void
944*0Sstevel@tonic-gate log_ce_err(struct async_flt *aflt, char *unum)
945*0Sstevel@tonic-gate {
946*0Sstevel@tonic-gate 	spitf_async_flt spf_flt;
947*0Sstevel@tonic-gate 
948*0Sstevel@tonic-gate 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
949*0Sstevel@tonic-gate 		return;
950*0Sstevel@tonic-gate 	}
951*0Sstevel@tonic-gate 
952*0Sstevel@tonic-gate 	spf_flt.cmn_asyncflt = *aflt;
953*0Sstevel@tonic-gate 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
954*0Sstevel@tonic-gate 	    " Corrected Memory Error detected by");
955*0Sstevel@tonic-gate }
956*0Sstevel@tonic-gate 
957*0Sstevel@tonic-gate /*
958*0Sstevel@tonic-gate  * Spitfire does not perform any further CE classification refinement
959*0Sstevel@tonic-gate  */
960*0Sstevel@tonic-gate /*ARGSUSED*/
961*0Sstevel@tonic-gate int
962*0Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
963*0Sstevel@tonic-gate     size_t afltoffset)
964*0Sstevel@tonic-gate {
965*0Sstevel@tonic-gate 	return (0);
966*0Sstevel@tonic-gate }
967*0Sstevel@tonic-gate 
968*0Sstevel@tonic-gate char *
969*0Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt)
970*0Sstevel@tonic-gate {
971*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_INTERMITTENT)
972*0Sstevel@tonic-gate 		return (ERR_TYPE_DESC_INTERMITTENT);
973*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_PERSISTENT)
974*0Sstevel@tonic-gate 		return (ERR_TYPE_DESC_PERSISTENT);
975*0Sstevel@tonic-gate 	if (aflt->flt_status & ECC_STICKY)
976*0Sstevel@tonic-gate 		return (ERR_TYPE_DESC_STICKY);
977*0Sstevel@tonic-gate 	return (ERR_TYPE_DESC_UNKNOWN);
978*0Sstevel@tonic-gate }
979*0Sstevel@tonic-gate 
980*0Sstevel@tonic-gate /*
981*0Sstevel@tonic-gate  * Called by correctable ecc error logging code to print out
982*0Sstevel@tonic-gate  * the stick/persistent/intermittent status of the error.
983*0Sstevel@tonic-gate  */
984*0Sstevel@tonic-gate static void
985*0Sstevel@tonic-gate cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
986*0Sstevel@tonic-gate {
987*0Sstevel@tonic-gate 	ushort_t status;
988*0Sstevel@tonic-gate 	char *status1_str = "Memory";
989*0Sstevel@tonic-gate 	char *status2_str = "Intermittent";
990*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
991*0Sstevel@tonic-gate 
992*0Sstevel@tonic-gate 	status = aflt->flt_status;
993*0Sstevel@tonic-gate 
994*0Sstevel@tonic-gate 	if (status & ECC_ECACHE)
995*0Sstevel@tonic-gate 		status1_str = "Ecache";
996*0Sstevel@tonic-gate 
997*0Sstevel@tonic-gate 	if (status & ECC_STICKY)
998*0Sstevel@tonic-gate 		status2_str = "Sticky";
999*0Sstevel@tonic-gate 	else if (status & ECC_PERSISTENT)
1000*0Sstevel@tonic-gate 		status2_str = "Persistent";
1001*0Sstevel@tonic-gate 
1002*0Sstevel@tonic-gate 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
1003*0Sstevel@tonic-gate 		NULL, " Corrected %s Error on %s is %s",
1004*0Sstevel@tonic-gate 		status1_str, unum, status2_str);
1005*0Sstevel@tonic-gate }
1006*0Sstevel@tonic-gate 
1007*0Sstevel@tonic-gate /*
1008*0Sstevel@tonic-gate  * check for a valid ce syndrome, then call the
1009*0Sstevel@tonic-gate  * displacement flush scrubbing code, and then check the afsr to see if
1010*0Sstevel@tonic-gate  * the error was persistent or intermittent. Reread the afar/afsr to see
1011*0Sstevel@tonic-gate  * if the error was not scrubbed successfully, and is therefore sticky.
1012*0Sstevel@tonic-gate  */
1013*0Sstevel@tonic-gate /*ARGSUSED1*/
1014*0Sstevel@tonic-gate void
1015*0Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
1016*0Sstevel@tonic-gate {
1017*0Sstevel@tonic-gate 	uint64_t eer, afsr;
1018*0Sstevel@tonic-gate 	ushort_t status;
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate 	ASSERT(getpil() > LOCK_LEVEL);
1021*0Sstevel@tonic-gate 
1022*0Sstevel@tonic-gate 	/*
1023*0Sstevel@tonic-gate 	 * It is possible that the flt_addr is not a valid
1024*0Sstevel@tonic-gate 	 * physical address. To deal with this, we disable
1025*0Sstevel@tonic-gate 	 * NCEEN while we scrub that address. If this causes
1026*0Sstevel@tonic-gate 	 * a TIMEOUT/BERR, we know this is an invalid
1027*0Sstevel@tonic-gate 	 * memory location.
1028*0Sstevel@tonic-gate 	 */
1029*0Sstevel@tonic-gate 	kpreempt_disable();
1030*0Sstevel@tonic-gate 	eer = get_error_enable();
1031*0Sstevel@tonic-gate 	if (eer & (EER_CEEN | EER_NCEEN))
1032*0Sstevel@tonic-gate 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1033*0Sstevel@tonic-gate 
1034*0Sstevel@tonic-gate 	/*
1035*0Sstevel@tonic-gate 	 * To check if the error detected by IO is persistent, sticky or
1036*0Sstevel@tonic-gate 	 * intermittent.
1037*0Sstevel@tonic-gate 	 */
1038*0Sstevel@tonic-gate 	if (ecc->flt_status & ECC_IOBUS) {
1039*0Sstevel@tonic-gate 		ecc->flt_stat = P_AFSR_CE;
1040*0Sstevel@tonic-gate 	}
1041*0Sstevel@tonic-gate 
1042*0Sstevel@tonic-gate 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1043*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_size);
1044*0Sstevel@tonic-gate 
1045*0Sstevel@tonic-gate 	get_asyncflt(&afsr);
1046*0Sstevel@tonic-gate 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1047*0Sstevel@tonic-gate 		/*
1048*0Sstevel@tonic-gate 		 * Must ensure that we don't get the TIMEOUT/BERR
1049*0Sstevel@tonic-gate 		 * when we reenable NCEEN, so we clear the AFSR.
1050*0Sstevel@tonic-gate 		 */
1051*0Sstevel@tonic-gate 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1052*0Sstevel@tonic-gate 		if (eer & (EER_CEEN | EER_NCEEN))
1053*0Sstevel@tonic-gate 		    set_error_enable(eer);
1054*0Sstevel@tonic-gate 		kpreempt_enable();
1055*0Sstevel@tonic-gate 		return;
1056*0Sstevel@tonic-gate 	}
1057*0Sstevel@tonic-gate 
1058*0Sstevel@tonic-gate 	if (eer & EER_NCEEN)
1059*0Sstevel@tonic-gate 	    set_error_enable(eer & ~EER_CEEN);
1060*0Sstevel@tonic-gate 
1061*0Sstevel@tonic-gate 	/*
1062*0Sstevel@tonic-gate 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1063*0Sstevel@tonic-gate 	 * not trip over the error, mark it intermittent.  If the scrub did
1064*0Sstevel@tonic-gate 	 * trip the error again and it did not scrub away, mark it sticky.
1065*0Sstevel@tonic-gate 	 * Otherwise mark it persistent.
1066*0Sstevel@tonic-gate 	 */
1067*0Sstevel@tonic-gate 	if (check_ecc(ecc) != 0) {
1068*0Sstevel@tonic-gate 		cpu_read_paddr(ecc, 0, 1);
1069*0Sstevel@tonic-gate 
1070*0Sstevel@tonic-gate 		if (check_ecc(ecc) != 0)
1071*0Sstevel@tonic-gate 			status = ECC_STICKY;
1072*0Sstevel@tonic-gate 		else
1073*0Sstevel@tonic-gate 			status = ECC_PERSISTENT;
1074*0Sstevel@tonic-gate 	} else
1075*0Sstevel@tonic-gate 		status = ECC_INTERMITTENT;
1076*0Sstevel@tonic-gate 
1077*0Sstevel@tonic-gate 	if (eer & (EER_CEEN | EER_NCEEN))
1078*0Sstevel@tonic-gate 	    set_error_enable(eer);
1079*0Sstevel@tonic-gate 	kpreempt_enable();
1080*0Sstevel@tonic-gate 
1081*0Sstevel@tonic-gate 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1082*0Sstevel@tonic-gate 	ecc->flt_status |= status;
1083*0Sstevel@tonic-gate }
1084*0Sstevel@tonic-gate 
1085*0Sstevel@tonic-gate /*
1086*0Sstevel@tonic-gate  * get the syndrome and unum, and then call the routines
1087*0Sstevel@tonic-gate  * to check the other cpus and iobuses, and then do the error logging.
1088*0Sstevel@tonic-gate  */
1089*0Sstevel@tonic-gate /*ARGSUSED1*/
1090*0Sstevel@tonic-gate void
1091*0Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1092*0Sstevel@tonic-gate {
1093*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
1094*0Sstevel@tonic-gate 	int len = 0;
1095*0Sstevel@tonic-gate 	int ce_verbose = 0;
1096*0Sstevel@tonic-gate 
1097*0Sstevel@tonic-gate 	ASSERT(ecc->flt_func != NULL);
1098*0Sstevel@tonic-gate 
1099*0Sstevel@tonic-gate 	/* Get the unum string for logging purposes */
1100*0Sstevel@tonic-gate 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1101*0Sstevel@tonic-gate 	    UNUM_NAMLEN, &len);
1102*0Sstevel@tonic-gate 
1103*0Sstevel@tonic-gate 	/* Call specific error logging routine */
1104*0Sstevel@tonic-gate 	(void) (*ecc->flt_func)(ecc, unum);
1105*0Sstevel@tonic-gate 
1106*0Sstevel@tonic-gate 	/*
1107*0Sstevel@tonic-gate 	 * Count errors per unum.
1108*0Sstevel@tonic-gate 	 * Non-memory errors are all counted via a special unum string.
1109*0Sstevel@tonic-gate 	 */
1110*0Sstevel@tonic-gate 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
1111*0Sstevel@tonic-gate 	    automatic_page_removal) {
1112*0Sstevel@tonic-gate 		page_t *pp = page_numtopp_nolock((pfn_t)
1113*0Sstevel@tonic-gate 		    (ecc->flt_addr >> MMU_PAGESHIFT));
1114*0Sstevel@tonic-gate 
1115*0Sstevel@tonic-gate 		if (pp) {
1116*0Sstevel@tonic-gate 			page_settoxic(pp, PAGE_IS_FAULTY);
1117*0Sstevel@tonic-gate 			(void) page_retire(pp, PAGE_IS_FAILING);
1118*0Sstevel@tonic-gate 		}
1119*0Sstevel@tonic-gate 	}
1120*0Sstevel@tonic-gate 
1121*0Sstevel@tonic-gate 	if (ecc->flt_panic) {
1122*0Sstevel@tonic-gate 		ce_verbose = 1;
1123*0Sstevel@tonic-gate 	} else if ((ecc->flt_class == BUS_FAULT) ||
1124*0Sstevel@tonic-gate 	    (ecc->flt_stat & P_AFSR_CE)) {
1125*0Sstevel@tonic-gate 		ce_verbose = (ce_verbose_memory > 0);
1126*0Sstevel@tonic-gate 	} else {
1127*0Sstevel@tonic-gate 		ce_verbose = 1;
1128*0Sstevel@tonic-gate 	}
1129*0Sstevel@tonic-gate 
1130*0Sstevel@tonic-gate 	if (ce_verbose) {
1131*0Sstevel@tonic-gate 		spitf_async_flt sflt;
1132*0Sstevel@tonic-gate 		int synd_code;
1133*0Sstevel@tonic-gate 
1134*0Sstevel@tonic-gate 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1135*0Sstevel@tonic-gate 
1136*0Sstevel@tonic-gate 		cpu_ce_log_status(&sflt, unum);
1137*0Sstevel@tonic-gate 
1138*0Sstevel@tonic-gate 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1139*0Sstevel@tonic-gate 				SYND(ecc->flt_synd));
1140*0Sstevel@tonic-gate 
1141*0Sstevel@tonic-gate 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1142*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1143*0Sstevel@tonic-gate 			    NULL, " ECC Data Bit %2d was in error "
1144*0Sstevel@tonic-gate 			    "and corrected", synd_code);
1145*0Sstevel@tonic-gate 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1146*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1147*0Sstevel@tonic-gate 			    NULL, " ECC Check Bit %2d was in error "
1148*0Sstevel@tonic-gate 			    "and corrected", synd_code - C0);
1149*0Sstevel@tonic-gate 		} else {
1150*0Sstevel@tonic-gate 			/*
1151*0Sstevel@tonic-gate 			 * These are UE errors - we shouldn't be getting CE
1152*0Sstevel@tonic-gate 			 * traps for these; handle them in case of bad h/w.
1153*0Sstevel@tonic-gate 			 */
1154*0Sstevel@tonic-gate 			switch (synd_code) {
1155*0Sstevel@tonic-gate 			case M2:
1156*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
1157*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
1158*0Sstevel@tonic-gate 				    " Two ECC Bits were in error");
1159*0Sstevel@tonic-gate 				break;
1160*0Sstevel@tonic-gate 			case M3:
1161*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
1162*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
1163*0Sstevel@tonic-gate 				    " Three ECC Bits were in error");
1164*0Sstevel@tonic-gate 				break;
1165*0Sstevel@tonic-gate 			case M4:
1166*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
1167*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
1168*0Sstevel@tonic-gate 				    " Four ECC Bits were in error");
1169*0Sstevel@tonic-gate 				break;
1170*0Sstevel@tonic-gate 			case MX:
1171*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
1172*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
1173*0Sstevel@tonic-gate 				    " More than Four ECC bits were "
1174*0Sstevel@tonic-gate 				    "in error");
1175*0Sstevel@tonic-gate 				break;
1176*0Sstevel@tonic-gate 			default:
1177*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
1178*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
1179*0Sstevel@tonic-gate 				    " Unknown fault syndrome %d",
1180*0Sstevel@tonic-gate 				    synd_code);
1181*0Sstevel@tonic-gate 				break;
1182*0Sstevel@tonic-gate 			}
1183*0Sstevel@tonic-gate 		}
1184*0Sstevel@tonic-gate 	}
1185*0Sstevel@tonic-gate 
1186*0Sstevel@tonic-gate 	/* Display entire cache line, if valid address */
1187*0Sstevel@tonic-gate 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1188*0Sstevel@tonic-gate 		read_ecc_data(ecc, 1, 1);
1189*0Sstevel@tonic-gate }
1190*0Sstevel@tonic-gate 
1191*0Sstevel@tonic-gate /*
1192*0Sstevel@tonic-gate  * We route all errors through a single switch statement.
1193*0Sstevel@tonic-gate  */
1194*0Sstevel@tonic-gate void
1195*0Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
1196*0Sstevel@tonic-gate {
1197*0Sstevel@tonic-gate 
1198*0Sstevel@tonic-gate 	switch (aflt->flt_class) {
1199*0Sstevel@tonic-gate 	case CPU_FAULT:
1200*0Sstevel@tonic-gate 		cpu_async_log_err(aflt);
1201*0Sstevel@tonic-gate 		break;
1202*0Sstevel@tonic-gate 
1203*0Sstevel@tonic-gate 	case BUS_FAULT:
1204*0Sstevel@tonic-gate 		bus_async_log_err(aflt);
1205*0Sstevel@tonic-gate 		break;
1206*0Sstevel@tonic-gate 
1207*0Sstevel@tonic-gate 	default:
1208*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1209*0Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1210*0Sstevel@tonic-gate 		break;
1211*0Sstevel@tonic-gate 	}
1212*0Sstevel@tonic-gate }
1213*0Sstevel@tonic-gate 
1214*0Sstevel@tonic-gate /* Values for action variable in cpu_async_error() */
1215*0Sstevel@tonic-gate #define	ACTION_NONE		0
1216*0Sstevel@tonic-gate #define	ACTION_TRAMPOLINE	1
1217*0Sstevel@tonic-gate #define	ACTION_AST_FLAGS	2
1218*0Sstevel@tonic-gate 
1219*0Sstevel@tonic-gate /*
1220*0Sstevel@tonic-gate  * Access error trap handler for asynchronous cpu errors.  This routine is
1221*0Sstevel@tonic-gate  * called to handle a data or instruction access error.  All fatal errors are
1222*0Sstevel@tonic-gate  * completely handled by this routine (by panicking).  Non fatal error logging
1223*0Sstevel@tonic-gate  * is queued for later processing either via AST or softint at a lower PIL.
1224*0Sstevel@tonic-gate  * In case of panic, the error log queue will also be processed as part of the
1225*0Sstevel@tonic-gate  * panic flow to ensure all errors are logged.  This routine is called with all
1226*0Sstevel@tonic-gate  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1227*0Sstevel@tonic-gate  * error bits are also cleared.  The hardware has also disabled the I and
1228*0Sstevel@tonic-gate  * D-caches for us, so we must re-enable them before returning.
1229*0Sstevel@tonic-gate  *
1230*0Sstevel@tonic-gate  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1231*0Sstevel@tonic-gate  *
1232*0Sstevel@tonic-gate  *		_______________________________________________________________
1233*0Sstevel@tonic-gate  *		|        Privileged tl0		|         Unprivileged	      |
1234*0Sstevel@tonic-gate  *		| Protected	| Unprotected	| Protected	| Unprotected |
1235*0Sstevel@tonic-gate  *		|on_trap|lofault|		|		|	      |
1236*0Sstevel@tonic-gate  * -------------|-------|-------+---------------+---------------+-------------|
1237*0Sstevel@tonic-gate  *		|	|	|		|		|	      |
1238*0Sstevel@tonic-gate  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1239*0Sstevel@tonic-gate  *		|	|	|		|		|	      |
1240*0Sstevel@tonic-gate  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1241*0Sstevel@tonic-gate  *		|	|	|		|		|	      |
1242*0Sstevel@tonic-gate  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1243*0Sstevel@tonic-gate  *		|	|	|		|		|	      |
1244*0Sstevel@tonic-gate  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1245*0Sstevel@tonic-gate  * ____________________________________________________________________________
1246*0Sstevel@tonic-gate  *
1247*0Sstevel@tonic-gate  *
1248*0Sstevel@tonic-gate  * Action codes:
1249*0Sstevel@tonic-gate  *
1250*0Sstevel@tonic-gate  * L - log
1251*0Sstevel@tonic-gate  * M - kick off memscrubber if flt_in_memory
1252*0Sstevel@tonic-gate  * P - panic
1253*0Sstevel@tonic-gate  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1254*0Sstevel@tonic-gate  * R - i)  if aft_panic is set, panic
1255*0Sstevel@tonic-gate  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1256*0Sstevel@tonic-gate  * S - send SIGBUS to process
1257*0Sstevel@tonic-gate  * T - trampoline
1258*0Sstevel@tonic-gate  *
1259*0Sstevel@tonic-gate  * Special cases:
1260*0Sstevel@tonic-gate  *
1261*0Sstevel@tonic-gate  * 1) if aft_testfatal is set, all faults result in a panic regardless
1262*0Sstevel@tonic-gate  *    of type (even WP), protection (even on_trap), or privilege.
1263*0Sstevel@tonic-gate  */
1264*0Sstevel@tonic-gate /*ARGSUSED*/
1265*0Sstevel@tonic-gate void
1266*0Sstevel@tonic-gate cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1267*0Sstevel@tonic-gate 	uint_t p_afsr_high, uint_t p_afar_high)
1268*0Sstevel@tonic-gate {
1269*0Sstevel@tonic-gate 	ushort_t sdbh, sdbl, ttype, tl;
1270*0Sstevel@tonic-gate 	spitf_async_flt spf_flt;
1271*0Sstevel@tonic-gate 	struct async_flt *aflt;
1272*0Sstevel@tonic-gate 	char pr_reason[28];
1273*0Sstevel@tonic-gate 	uint64_t oafsr;
1274*0Sstevel@tonic-gate 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1275*0Sstevel@tonic-gate 	int action = ACTION_NONE;
1276*0Sstevel@tonic-gate 	uint64_t t_afar = p_afar;
1277*0Sstevel@tonic-gate 	uint64_t t_afsr = p_afsr;
1278*0Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
1279*0Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
1280*0Sstevel@tonic-gate 
1281*0Sstevel@tonic-gate 	/*
1282*0Sstevel@tonic-gate 	 * We need to look at p_flag to determine if the thread detected an
1283*0Sstevel@tonic-gate 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1284*0Sstevel@tonic-gate 	 * because we just need a consistent snapshot and we know that everyone
1285*0Sstevel@tonic-gate 	 * else will store a consistent set of bits while holding p_lock.  We
1286*0Sstevel@tonic-gate 	 * don't have to worry about a race because SDOCORE is set once prior
1287*0Sstevel@tonic-gate 	 * to doing i/o from the process's address space and is never cleared.
1288*0Sstevel@tonic-gate 	 */
1289*0Sstevel@tonic-gate 	uint_t pflag = ttoproc(curthread)->p_flag;
1290*0Sstevel@tonic-gate 
1291*0Sstevel@tonic-gate 	pr_reason[0] = '\0';
1292*0Sstevel@tonic-gate 
1293*0Sstevel@tonic-gate 	/*
1294*0Sstevel@tonic-gate 	 * Note: the Spitfire data buffer error registers
1295*0Sstevel@tonic-gate 	 * (upper and lower halves) are or'ed into the upper
1296*0Sstevel@tonic-gate 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1297*0Sstevel@tonic-gate 	 */
1298*0Sstevel@tonic-gate 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1299*0Sstevel@tonic-gate 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1300*0Sstevel@tonic-gate 
1301*0Sstevel@tonic-gate 	/*
1302*0Sstevel@tonic-gate 	 * Grab the ttype encoded in <63:53> of the saved
1303*0Sstevel@tonic-gate 	 * afsr passed from async_err()
1304*0Sstevel@tonic-gate 	 */
1305*0Sstevel@tonic-gate 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1306*0Sstevel@tonic-gate 	tl = (ushort_t)(t_afsr >> 62);
1307*0Sstevel@tonic-gate 
1308*0Sstevel@tonic-gate 	t_afsr &= S_AFSR_MASK;
1309*0Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1310*0Sstevel@tonic-gate 
1311*0Sstevel@tonic-gate 	/*
1312*0Sstevel@tonic-gate 	 * Initialize most of the common and CPU-specific structure.  We derive
1313*0Sstevel@tonic-gate 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1314*0Sstevel@tonic-gate 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1315*0Sstevel@tonic-gate 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1316*0Sstevel@tonic-gate 	 * tuneable aft_testfatal is set (not the default).
1317*0Sstevel@tonic-gate 	 */
1318*0Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
1319*0Sstevel@tonic-gate 	aflt = (struct async_flt *)&spf_flt;
1320*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
1321*0Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
1322*0Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
1323*0Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
1324*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
1325*0Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
1326*0Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
1327*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
1328*0Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1329*0Sstevel@tonic-gate 	aflt->flt_tl = (uchar_t)tl;
1330*0Sstevel@tonic-gate 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1331*0Sstevel@tonic-gate 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1332*0Sstevel@tonic-gate 
1333*0Sstevel@tonic-gate 	/*
1334*0Sstevel@tonic-gate 	 * Set flt_status based on the trap type.  If we end up here as the
1335*0Sstevel@tonic-gate 	 * result of a UE detected by the CE handling code, leave status 0.
1336*0Sstevel@tonic-gate 	 */
1337*0Sstevel@tonic-gate 	switch (ttype) {
1338*0Sstevel@tonic-gate 	case T_DATA_ERROR:
1339*0Sstevel@tonic-gate 		aflt->flt_status = ECC_D_TRAP;
1340*0Sstevel@tonic-gate 		break;
1341*0Sstevel@tonic-gate 	case T_INSTR_ERROR:
1342*0Sstevel@tonic-gate 		aflt->flt_status = ECC_I_TRAP;
1343*0Sstevel@tonic-gate 		break;
1344*0Sstevel@tonic-gate 	}
1345*0Sstevel@tonic-gate 
1346*0Sstevel@tonic-gate 	spf_flt.flt_sdbh = sdbh;
1347*0Sstevel@tonic-gate 	spf_flt.flt_sdbl = sdbl;
1348*0Sstevel@tonic-gate 
1349*0Sstevel@tonic-gate 	/*
1350*0Sstevel@tonic-gate 	 * Check for fatal async errors.
1351*0Sstevel@tonic-gate 	 */
1352*0Sstevel@tonic-gate 	check_misc_err(&spf_flt);
1353*0Sstevel@tonic-gate 
1354*0Sstevel@tonic-gate 	/*
1355*0Sstevel@tonic-gate 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1356*0Sstevel@tonic-gate 	 * see if we were executing in the kernel under on_trap() or t_lofault
1357*0Sstevel@tonic-gate 	 * protection.  If so, modify the saved registers so that we return
1358*0Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine.
1359*0Sstevel@tonic-gate 	 */
1360*0Sstevel@tonic-gate 	if (aflt->flt_priv && tl == 0) {
1361*0Sstevel@tonic-gate 		if (curthread->t_ontrap != NULL) {
1362*0Sstevel@tonic-gate 			on_trap_data_t *otp = curthread->t_ontrap;
1363*0Sstevel@tonic-gate 
1364*0Sstevel@tonic-gate 			if (otp->ot_prot & OT_DATA_EC) {
1365*0Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_EC;
1366*0Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_EC;
1367*0Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
1368*0Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
1369*0Sstevel@tonic-gate 				action = ACTION_TRAMPOLINE;
1370*0Sstevel@tonic-gate 			}
1371*0Sstevel@tonic-gate 
1372*0Sstevel@tonic-gate 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1373*0Sstevel@tonic-gate 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1374*0Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_ACCESS;
1375*0Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_ACCESS;
1376*0Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
1377*0Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
1378*0Sstevel@tonic-gate 				action = ACTION_TRAMPOLINE;
1379*0Sstevel@tonic-gate 				/*
1380*0Sstevel@tonic-gate 				 * for peeks and caut_gets errors are expected
1381*0Sstevel@tonic-gate 				 */
1382*0Sstevel@tonic-gate 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1383*0Sstevel@tonic-gate 				if (!hp)
1384*0Sstevel@tonic-gate 					expected = DDI_FM_ERR_PEEK;
1385*0Sstevel@tonic-gate 				else if (hp->ah_acc.devacc_attr_access ==
1386*0Sstevel@tonic-gate 				    DDI_CAUTIOUS_ACC)
1387*0Sstevel@tonic-gate 					expected = DDI_FM_ERR_EXPECTED;
1388*0Sstevel@tonic-gate 			}
1389*0Sstevel@tonic-gate 
1390*0Sstevel@tonic-gate 		} else if (curthread->t_lofault) {
1391*0Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_COPY;
1392*0Sstevel@tonic-gate 			rp->r_g1 = EFAULT;
1393*0Sstevel@tonic-gate 			rp->r_pc = curthread->t_lofault;
1394*0Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
1395*0Sstevel@tonic-gate 			action = ACTION_TRAMPOLINE;
1396*0Sstevel@tonic-gate 		}
1397*0Sstevel@tonic-gate 	}
1398*0Sstevel@tonic-gate 
1399*0Sstevel@tonic-gate 	/*
1400*0Sstevel@tonic-gate 	 * Determine if this error needs to be treated as fatal.  Note that
1401*0Sstevel@tonic-gate 	 * multiple errors detected upon entry to this trap handler does not
1402*0Sstevel@tonic-gate 	 * necessarily warrant a panic.  We only want to panic if the trap
1403*0Sstevel@tonic-gate 	 * happened in privileged mode and not under t_ontrap or t_lofault
1404*0Sstevel@tonic-gate 	 * protection.  The exception is WP: if we *only* get WP, it is not
1405*0Sstevel@tonic-gate 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1406*0Sstevel@tonic-gate 	 *
1407*0Sstevel@tonic-gate 	 * aft_panic, if set, effectively makes us treat usermode
1408*0Sstevel@tonic-gate 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1409*0Sstevel@tonic-gate 	 * panic instead of sending a contract event.  A lofault-protected
1410*0Sstevel@tonic-gate 	 * fault will normally follow the contract event; if aft_panic is
1411*0Sstevel@tonic-gate 	 * set this will be changed to a panic.
1412*0Sstevel@tonic-gate 	 *
1413*0Sstevel@tonic-gate 	 * For usermode BERR/BTO errors, eg from processes performing device
1414*0Sstevel@tonic-gate 	 * control through mapped device memory, we need only deliver
1415*0Sstevel@tonic-gate 	 * a SIGBUS to the offending process.
1416*0Sstevel@tonic-gate 	 *
1417*0Sstevel@tonic-gate 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1418*0Sstevel@tonic-gate 	 * checked later; for now we implement the common reasons.
1419*0Sstevel@tonic-gate 	 */
1420*0Sstevel@tonic-gate 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1421*0Sstevel@tonic-gate 		/*
1422*0Sstevel@tonic-gate 		 * Beware - multiple bits may be set in AFSR
1423*0Sstevel@tonic-gate 		 */
1424*0Sstevel@tonic-gate 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1425*0Sstevel@tonic-gate 			if (aflt->flt_priv || aft_panic)
1426*0Sstevel@tonic-gate 				aflt->flt_panic = 1;
1427*0Sstevel@tonic-gate 		}
1428*0Sstevel@tonic-gate 
1429*0Sstevel@tonic-gate 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1430*0Sstevel@tonic-gate 			if (aflt->flt_priv)
1431*0Sstevel@tonic-gate 				aflt->flt_panic = 1;
1432*0Sstevel@tonic-gate 		}
1433*0Sstevel@tonic-gate 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1434*0Sstevel@tonic-gate 		aflt->flt_panic = 1;
1435*0Sstevel@tonic-gate 	}
1436*0Sstevel@tonic-gate 
1437*0Sstevel@tonic-gate 	/*
1438*0Sstevel@tonic-gate 	 * UE/BERR/TO: Call our bus nexus friends to check for
1439*0Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
1440*0Sstevel@tonic-gate 	 */
1441*0Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1442*0Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
1443*0Sstevel@tonic-gate 	}
1444*0Sstevel@tonic-gate 
1445*0Sstevel@tonic-gate 	/*
1446*0Sstevel@tonic-gate 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1447*0Sstevel@tonic-gate 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1448*0Sstevel@tonic-gate 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1449*0Sstevel@tonic-gate 	 * caches may introduce more parity errors (especially when the module
1450*0Sstevel@tonic-gate 	 * is bad) and in sabre there is no guarantee that such errors
1451*0Sstevel@tonic-gate 	 * (if introduced) are written back as poisoned data.
1452*0Sstevel@tonic-gate 	 */
1453*0Sstevel@tonic-gate 	if (t_afsr & P_AFSR_UE) {
1454*0Sstevel@tonic-gate 		int i;
1455*0Sstevel@tonic-gate 
1456*0Sstevel@tonic-gate 		(void) strcat(pr_reason, "UE ");
1457*0Sstevel@tonic-gate 
1458*0Sstevel@tonic-gate 		spf_flt.flt_type = CPU_UE_ERR;
1459*0Sstevel@tonic-gate 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1460*0Sstevel@tonic-gate 			MMU_PAGESHIFT)) ? 1: 0;
1461*0Sstevel@tonic-gate 
1462*0Sstevel@tonic-gate 		/*
1463*0Sstevel@tonic-gate 		 * With UE, we have the PA of the fault.
1464*0Sstevel@tonic-gate 		 * Let do a diagnostic read to get the ecache
1465*0Sstevel@tonic-gate 		 * data and tag info of the bad line for logging.
1466*0Sstevel@tonic-gate 		 */
1467*0Sstevel@tonic-gate 		if (aflt->flt_in_memory) {
1468*0Sstevel@tonic-gate 			uint32_t ec_set_size;
1469*0Sstevel@tonic-gate 			uchar_t state;
1470*0Sstevel@tonic-gate 			uint32_t ecache_idx;
1471*0Sstevel@tonic-gate 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1472*0Sstevel@tonic-gate 
1473*0Sstevel@tonic-gate 			/* touch the line to put it in ecache */
1474*0Sstevel@tonic-gate 			acc_afsr |= read_and_clear_afsr();
1475*0Sstevel@tonic-gate 			(void) lddphys(faultpa);
1476*0Sstevel@tonic-gate 			acc_afsr |= (read_and_clear_afsr() &
1477*0Sstevel@tonic-gate 				    ~(P_AFSR_EDP | P_AFSR_UE));
1478*0Sstevel@tonic-gate 
1479*0Sstevel@tonic-gate 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1480*0Sstevel@tonic-gate 			    ecache_associativity;
1481*0Sstevel@tonic-gate 
1482*0Sstevel@tonic-gate 			for (i = 0; i < ecache_associativity; i++) {
1483*0Sstevel@tonic-gate 				ecache_idx = i * ec_set_size +
1484*0Sstevel@tonic-gate 				    (aflt->flt_addr % ec_set_size);
1485*0Sstevel@tonic-gate 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1486*0Sstevel@tonic-gate 					(uint64_t *)&spf_flt.flt_ec_data[0],
1487*0Sstevel@tonic-gate 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1488*0Sstevel@tonic-gate 				acc_afsr |= oafsr;
1489*0Sstevel@tonic-gate 
1490*0Sstevel@tonic-gate 				state = (uchar_t)((spf_flt.flt_ec_tag &
1491*0Sstevel@tonic-gate 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1492*0Sstevel@tonic-gate 
1493*0Sstevel@tonic-gate 				if ((state & cpu_ec_state_valid) &&
1494*0Sstevel@tonic-gate 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1495*0Sstevel@tonic-gate 				    ((uint64_t)aflt->flt_addr >>
1496*0Sstevel@tonic-gate 				    cpu_ec_tag_shift)))
1497*0Sstevel@tonic-gate 					break;
1498*0Sstevel@tonic-gate 			}
1499*0Sstevel@tonic-gate 
1500*0Sstevel@tonic-gate 			/*
1501*0Sstevel@tonic-gate 			 * Check to see if the ecache tag is valid for the
1502*0Sstevel@tonic-gate 			 * fault PA. In the very unlikely event where the
1503*0Sstevel@tonic-gate 			 * line could be victimized, no ecache info will be
1504*0Sstevel@tonic-gate 			 * available. If this is the case, capture the line
1505*0Sstevel@tonic-gate 			 * from memory instead.
1506*0Sstevel@tonic-gate 			 */
1507*0Sstevel@tonic-gate 			if ((state & cpu_ec_state_valid) == 0 ||
1508*0Sstevel@tonic-gate 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1509*0Sstevel@tonic-gate 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1510*0Sstevel@tonic-gate 				for (i = 0; i < 8; i++, faultpa += 8) {
1511*0Sstevel@tonic-gate 				    ec_data_t *ecdptr;
1512*0Sstevel@tonic-gate 
1513*0Sstevel@tonic-gate 					ecdptr = &spf_flt.flt_ec_data[i];
1514*0Sstevel@tonic-gate 					acc_afsr |= read_and_clear_afsr();
1515*0Sstevel@tonic-gate 					ecdptr->ec_d8 = lddphys(faultpa);
1516*0Sstevel@tonic-gate 					acc_afsr |= (read_and_clear_afsr() &
1517*0Sstevel@tonic-gate 						    ~(P_AFSR_EDP | P_AFSR_UE));
1518*0Sstevel@tonic-gate 					ecdptr->ec_afsr = 0;
1519*0Sstevel@tonic-gate 							/* null afsr value */
1520*0Sstevel@tonic-gate 				}
1521*0Sstevel@tonic-gate 
1522*0Sstevel@tonic-gate 				/*
1523*0Sstevel@tonic-gate 				 * Mark tag invalid to indicate mem dump
1524*0Sstevel@tonic-gate 				 * when we print out the info.
1525*0Sstevel@tonic-gate 				 */
1526*0Sstevel@tonic-gate 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1527*0Sstevel@tonic-gate 			}
1528*0Sstevel@tonic-gate 			spf_flt.flt_ec_lcnt = 1;
1529*0Sstevel@tonic-gate 
1530*0Sstevel@tonic-gate 			/*
1531*0Sstevel@tonic-gate 			 * Flush out the bad line
1532*0Sstevel@tonic-gate 			 */
1533*0Sstevel@tonic-gate 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1534*0Sstevel@tonic-gate 				cpunodes[CPU->cpu_id].ecache_size);
1535*0Sstevel@tonic-gate 
1536*0Sstevel@tonic-gate 			acc_afsr |= clear_errors(NULL, NULL);
1537*0Sstevel@tonic-gate 		}
1538*0Sstevel@tonic-gate 
1539*0Sstevel@tonic-gate 		/*
1540*0Sstevel@tonic-gate 		 * Ask our bus nexus friends if they have any fatal errors. If
1541*0Sstevel@tonic-gate 		 * so, they will log appropriate error messages and panic as a
1542*0Sstevel@tonic-gate 		 * result. We then queue an event for each UDB that reports a
1543*0Sstevel@tonic-gate 		 * UE. Each UE reported in a UDB will have its own log message.
1544*0Sstevel@tonic-gate 		 *
1545*0Sstevel@tonic-gate 		 * Note from kbn: In the case where there are multiple UEs
1546*0Sstevel@tonic-gate 		 * (ME bit is set) - the AFAR address is only accurate to
1547*0Sstevel@tonic-gate 		 * the 16-byte granularity. One cannot tell whether the AFAR
1548*0Sstevel@tonic-gate 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1549*0Sstevel@tonic-gate 		 * always report the AFAR address to be 16-byte aligned.
1550*0Sstevel@tonic-gate 		 *
1551*0Sstevel@tonic-gate 		 * If we're on a Sabre, there is no SDBL, but it will always
1552*0Sstevel@tonic-gate 		 * read as zero, so the sdbl test below will safely fail.
1553*0Sstevel@tonic-gate 		 */
1554*0Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1555*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1556*0Sstevel@tonic-gate 
1557*0Sstevel@tonic-gate 		if (sdbh & P_DER_UE) {
1558*0Sstevel@tonic-gate 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1559*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1560*0Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1561*0Sstevel@tonic-gate 			    aflt->flt_panic);
1562*0Sstevel@tonic-gate 		}
1563*0Sstevel@tonic-gate 		if (sdbl & P_DER_UE) {
1564*0Sstevel@tonic-gate 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1565*0Sstevel@tonic-gate 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1566*0Sstevel@tonic-gate 			if (!(aflt->flt_stat & P_AFSR_ME))
1567*0Sstevel@tonic-gate 				aflt->flt_addr |= 0x8;
1568*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1569*0Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1570*0Sstevel@tonic-gate 			    aflt->flt_panic);
1571*0Sstevel@tonic-gate 		}
1572*0Sstevel@tonic-gate 
1573*0Sstevel@tonic-gate 		/*
1574*0Sstevel@tonic-gate 		 * We got a UE and are panicking, save the fault PA in a known
1575*0Sstevel@tonic-gate 		 * location so that the platform specific panic code can check
1576*0Sstevel@tonic-gate 		 * for copyback errors.
1577*0Sstevel@tonic-gate 		 */
1578*0Sstevel@tonic-gate 		if (aflt->flt_panic && aflt->flt_in_memory) {
1579*0Sstevel@tonic-gate 			panic_aflt = *aflt;
1580*0Sstevel@tonic-gate 		}
1581*0Sstevel@tonic-gate 	}
1582*0Sstevel@tonic-gate 
1583*0Sstevel@tonic-gate 	/*
1584*0Sstevel@tonic-gate 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1585*0Sstevel@tonic-gate 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1586*0Sstevel@tonic-gate 	 */
1587*0Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1588*0Sstevel@tonic-gate 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1589*0Sstevel@tonic-gate 
1590*0Sstevel@tonic-gate 		if (t_afsr & P_AFSR_EDP)
1591*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "EDP ");
1592*0Sstevel@tonic-gate 
1593*0Sstevel@tonic-gate 		if (t_afsr & P_AFSR_LDP)
1594*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "LDP ");
1595*0Sstevel@tonic-gate 
1596*0Sstevel@tonic-gate 		/*
1597*0Sstevel@tonic-gate 		 * Here we have no PA to work with.
1598*0Sstevel@tonic-gate 		 * Scan each line in the ecache to look for
1599*0Sstevel@tonic-gate 		 * the one with bad parity.
1600*0Sstevel@tonic-gate 		 */
1601*0Sstevel@tonic-gate 		aflt->flt_addr = AFLT_INV_ADDR;
1602*0Sstevel@tonic-gate 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1603*0Sstevel@tonic-gate 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1604*0Sstevel@tonic-gate 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1605*0Sstevel@tonic-gate 
1606*0Sstevel@tonic-gate 		/*
1607*0Sstevel@tonic-gate 		 * If we found a bad PA, update the state to indicate if it is
1608*0Sstevel@tonic-gate 		 * memory or I/O space.  This code will be important if we ever
1609*0Sstevel@tonic-gate 		 * support cacheable frame buffers.
1610*0Sstevel@tonic-gate 		 */
1611*0Sstevel@tonic-gate 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1612*0Sstevel@tonic-gate 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1613*0Sstevel@tonic-gate 				MMU_PAGESHIFT)) ? 1 : 0;
1614*0Sstevel@tonic-gate 		}
1615*0Sstevel@tonic-gate 
1616*0Sstevel@tonic-gate 		if (isus2i || isus2e)
1617*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1618*0Sstevel@tonic-gate 
1619*0Sstevel@tonic-gate 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1620*0Sstevel@tonic-gate 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1621*0Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1622*0Sstevel@tonic-gate 		    aflt->flt_panic);
1623*0Sstevel@tonic-gate 	}
1624*0Sstevel@tonic-gate 
1625*0Sstevel@tonic-gate 	/*
1626*0Sstevel@tonic-gate 	 * Timeout and bus error handling.  There are two cases to consider:
1627*0Sstevel@tonic-gate 	 *
1628*0Sstevel@tonic-gate 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1629*0Sstevel@tonic-gate 	 * have already modified the saved registers so that we will return
1630*0Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1631*0Sstevel@tonic-gate 	 *
1632*0Sstevel@tonic-gate 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1633*0Sstevel@tonic-gate 	 * a SIGBUS.  We do not log the occurence - processes performing
1634*0Sstevel@tonic-gate 	 * device control would generate lots of uninteresting messages.
1635*0Sstevel@tonic-gate 	 */
1636*0Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1637*0Sstevel@tonic-gate 		if (t_afsr & P_AFSR_TO)
1638*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "BTO ");
1639*0Sstevel@tonic-gate 
1640*0Sstevel@tonic-gate 		if (t_afsr & P_AFSR_BERR)
1641*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "BERR ");
1642*0Sstevel@tonic-gate 
1643*0Sstevel@tonic-gate 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1644*0Sstevel@tonic-gate 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1645*0Sstevel@tonic-gate 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1646*0Sstevel@tonic-gate 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1647*0Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1648*0Sstevel@tonic-gate 			    aflt->flt_panic);
1649*0Sstevel@tonic-gate 		}
1650*0Sstevel@tonic-gate 	}
1651*0Sstevel@tonic-gate 
1652*0Sstevel@tonic-gate 	/*
1653*0Sstevel@tonic-gate 	 * Handle WP: WP happens when the ecache is victimized and a parity
1654*0Sstevel@tonic-gate 	 * error was detected on a writeback.  The data in question will be
1655*0Sstevel@tonic-gate 	 * poisoned as a UE will be written back.  The PA is not logged and
1656*0Sstevel@tonic-gate 	 * it is possible that it doesn't belong to the trapped thread.  The
1657*0Sstevel@tonic-gate 	 * WP trap is not fatal, but it could be fatal to someone that
1658*0Sstevel@tonic-gate 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1659*0Sstevel@tonic-gate 	 * to force the memscrubber to read all of memory when it awakens.
1660*0Sstevel@tonic-gate 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1661*0Sstevel@tonic-gate 	 * UE back to poison the data.
1662*0Sstevel@tonic-gate 	 */
1663*0Sstevel@tonic-gate 	if (t_afsr & P_AFSR_WP) {
1664*0Sstevel@tonic-gate 		(void) strcat(pr_reason, "WP ");
1665*0Sstevel@tonic-gate 		if (isus2i || isus2e) {
1666*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1667*0Sstevel@tonic-gate 		} else {
1668*0Sstevel@tonic-gate 			read_all_memscrub = 1;
1669*0Sstevel@tonic-gate 		}
1670*0Sstevel@tonic-gate 		spf_flt.flt_type = CPU_WP_ERR;
1671*0Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1672*0Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1673*0Sstevel@tonic-gate 		    aflt->flt_panic);
1674*0Sstevel@tonic-gate 	}
1675*0Sstevel@tonic-gate 
1676*0Sstevel@tonic-gate 	/*
1677*0Sstevel@tonic-gate 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1678*0Sstevel@tonic-gate 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1679*0Sstevel@tonic-gate 	 * This is fatal.
1680*0Sstevel@tonic-gate 	 */
1681*0Sstevel@tonic-gate 
1682*0Sstevel@tonic-gate 	if (t_afsr & P_AFSR_CP) {
1683*0Sstevel@tonic-gate 		if (isus2i || isus2e) {
1684*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "CP ");
1685*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1686*0Sstevel@tonic-gate 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1687*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1688*0Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1689*0Sstevel@tonic-gate 			    aflt->flt_panic);
1690*0Sstevel@tonic-gate 		} else {
1691*0Sstevel@tonic-gate 			/*
1692*0Sstevel@tonic-gate 			 * Orphan CP: Happens due to signal integrity problem
1693*0Sstevel@tonic-gate 			 * on a CPU, where a CP is reported, without reporting
1694*0Sstevel@tonic-gate 			 * its associated UE. This is handled by locating the
1695*0Sstevel@tonic-gate 			 * bad parity line and would kick off the memscrubber
1696*0Sstevel@tonic-gate 			 * to find the UE if in memory or in another's cache.
1697*0Sstevel@tonic-gate 			 */
1698*0Sstevel@tonic-gate 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1699*0Sstevel@tonic-gate 			(void) strcat(pr_reason, "ORPHAN_CP ");
1700*0Sstevel@tonic-gate 
1701*0Sstevel@tonic-gate 			/*
1702*0Sstevel@tonic-gate 			 * Here we have no PA to work with.
1703*0Sstevel@tonic-gate 			 * Scan each line in the ecache to look for
1704*0Sstevel@tonic-gate 			 * the one with bad parity.
1705*0Sstevel@tonic-gate 			 */
1706*0Sstevel@tonic-gate 			aflt->flt_addr = AFLT_INV_ADDR;
1707*0Sstevel@tonic-gate 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1708*0Sstevel@tonic-gate 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1709*0Sstevel@tonic-gate 				&oafsr);
1710*0Sstevel@tonic-gate 			acc_afsr |= oafsr;
1711*0Sstevel@tonic-gate 
1712*0Sstevel@tonic-gate 			/*
1713*0Sstevel@tonic-gate 			 * If we found a bad PA, update the state to indicate
1714*0Sstevel@tonic-gate 			 * if it is memory or I/O space.
1715*0Sstevel@tonic-gate 			 */
1716*0Sstevel@tonic-gate 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1717*0Sstevel@tonic-gate 				aflt->flt_in_memory =
1718*0Sstevel@tonic-gate 					(pf_is_memory(aflt->flt_addr >>
1719*0Sstevel@tonic-gate 						MMU_PAGESHIFT)) ? 1 : 0;
1720*0Sstevel@tonic-gate 			}
1721*0Sstevel@tonic-gate 			read_all_memscrub = 1;
1722*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1723*0Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1724*0Sstevel@tonic-gate 			    aflt->flt_panic);
1725*0Sstevel@tonic-gate 
1726*0Sstevel@tonic-gate 		}
1727*0Sstevel@tonic-gate 	}
1728*0Sstevel@tonic-gate 
1729*0Sstevel@tonic-gate 	/*
1730*0Sstevel@tonic-gate 	 * If we queued an error other than WP or CP and we are going to return
1731*0Sstevel@tonic-gate 	 * from the trap and the error was in user mode or inside of a
1732*0Sstevel@tonic-gate 	 * copy routine, set AST flag so the queue will be drained before
1733*0Sstevel@tonic-gate 	 * returning to user mode.
1734*0Sstevel@tonic-gate 	 *
1735*0Sstevel@tonic-gate 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1736*0Sstevel@tonic-gate 	 * and send an event to its process contract.
1737*0Sstevel@tonic-gate 	 *
1738*0Sstevel@tonic-gate 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1739*0Sstevel@tonic-gate 	 * will have been no error queued in this case.
1740*0Sstevel@tonic-gate 	 */
1741*0Sstevel@tonic-gate 	if ((t_afsr &
1742*0Sstevel@tonic-gate 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1743*0Sstevel@tonic-gate 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1744*0Sstevel@tonic-gate 			int pcb_flag = 0;
1745*0Sstevel@tonic-gate 
1746*0Sstevel@tonic-gate 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1747*0Sstevel@tonic-gate 				pcb_flag |= ASYNC_HWERR;
1748*0Sstevel@tonic-gate 
1749*0Sstevel@tonic-gate 			if (t_afsr & P_AFSR_BERR)
1750*0Sstevel@tonic-gate 				pcb_flag |= ASYNC_BERR;
1751*0Sstevel@tonic-gate 
1752*0Sstevel@tonic-gate 			if (t_afsr & P_AFSR_TO)
1753*0Sstevel@tonic-gate 				pcb_flag |= ASYNC_BTO;
1754*0Sstevel@tonic-gate 
1755*0Sstevel@tonic-gate 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1756*0Sstevel@tonic-gate 			aston(curthread);
1757*0Sstevel@tonic-gate 			action = ACTION_AST_FLAGS;
1758*0Sstevel@tonic-gate 	}
1759*0Sstevel@tonic-gate 
1760*0Sstevel@tonic-gate 	/*
1761*0Sstevel@tonic-gate 	 * In response to a deferred error, we must do one of three things:
1762*0Sstevel@tonic-gate 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1763*0Sstevel@tonic-gate 	 * set in cases (1) and (2) - check that either action is set or
1764*0Sstevel@tonic-gate 	 * (3) is true.
1765*0Sstevel@tonic-gate 	 *
1766*0Sstevel@tonic-gate 	 * On II, the WP writes poisoned data back to memory, which will
1767*0Sstevel@tonic-gate 	 * cause a UE and a panic or reboot when read.  In this case, we
1768*0Sstevel@tonic-gate 	 * don't need to panic at this time.  On IIi and IIe,
1769*0Sstevel@tonic-gate 	 * aflt->flt_panic is already set above.
1770*0Sstevel@tonic-gate 	 */
1771*0Sstevel@tonic-gate 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1772*0Sstevel@tonic-gate 	    (t_afsr & P_AFSR_WP));
1773*0Sstevel@tonic-gate 
1774*0Sstevel@tonic-gate 	/*
1775*0Sstevel@tonic-gate 	 * Make a final sanity check to make sure we did not get any more async
1776*0Sstevel@tonic-gate 	 * errors and accumulate the afsr.
1777*0Sstevel@tonic-gate 	 */
1778*0Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1779*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
1780*0Sstevel@tonic-gate 	(void) clear_errors(&spf_flt, NULL);
1781*0Sstevel@tonic-gate 
1782*0Sstevel@tonic-gate 	/*
1783*0Sstevel@tonic-gate 	 * Take care of a special case: If there is a UE in the ecache flush
1784*0Sstevel@tonic-gate 	 * area, we'll see it in flush_ecache().  This will trigger the
1785*0Sstevel@tonic-gate 	 * CPU_ADDITIONAL_ERRORS case below.
1786*0Sstevel@tonic-gate 	 *
1787*0Sstevel@tonic-gate 	 * This could occur if the original error was a UE in the flush area,
1788*0Sstevel@tonic-gate 	 * or if the original error was an E$ error that was flushed out of
1789*0Sstevel@tonic-gate 	 * the E$ in scan_ecache().
1790*0Sstevel@tonic-gate 	 *
1791*0Sstevel@tonic-gate 	 * If it's at the same address that we're already logging, then it's
1792*0Sstevel@tonic-gate 	 * probably one of these cases.  Clear the bit so we don't trip over
1793*0Sstevel@tonic-gate 	 * it on the additional errors case, which could cause an unnecessary
1794*0Sstevel@tonic-gate 	 * panic.
1795*0Sstevel@tonic-gate 	 */
1796*0Sstevel@tonic-gate 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1797*0Sstevel@tonic-gate 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1798*0Sstevel@tonic-gate 	else
1799*0Sstevel@tonic-gate 		acc_afsr |= aflt->flt_stat;
1800*0Sstevel@tonic-gate 
1801*0Sstevel@tonic-gate 	/*
1802*0Sstevel@tonic-gate 	 * Check the acumulated afsr for the important bits.
1803*0Sstevel@tonic-gate 	 * Make sure the spf_flt.flt_type value is set, and
1804*0Sstevel@tonic-gate 	 * enque an error.
1805*0Sstevel@tonic-gate 	 */
1806*0Sstevel@tonic-gate 	if (acc_afsr &
1807*0Sstevel@tonic-gate 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1808*0Sstevel@tonic-gate 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1809*0Sstevel@tonic-gate 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1810*0Sstevel@tonic-gate 		    P_AFSR_ISAP))
1811*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
1812*0Sstevel@tonic-gate 
1813*0Sstevel@tonic-gate 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1814*0Sstevel@tonic-gate 		aflt->flt_stat = acc_afsr;
1815*0Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1816*0Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1817*0Sstevel@tonic-gate 		    aflt->flt_panic);
1818*0Sstevel@tonic-gate 	}
1819*0Sstevel@tonic-gate 
1820*0Sstevel@tonic-gate 	/*
1821*0Sstevel@tonic-gate 	 * If aflt->flt_panic is set at this point, we need to panic as the
1822*0Sstevel@tonic-gate 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1823*0Sstevel@tonic-gate 	 * We've already enqueued the error in one of the if-clauses above,
1824*0Sstevel@tonic-gate 	 * and it will be dequeued and logged as part of the panic flow.
1825*0Sstevel@tonic-gate 	 */
1826*0Sstevel@tonic-gate 	if (aflt->flt_panic) {
1827*0Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1828*0Sstevel@tonic-gate 		    "See previous message(s) for details", " %sError(s)",
1829*0Sstevel@tonic-gate 		    pr_reason);
1830*0Sstevel@tonic-gate 	}
1831*0Sstevel@tonic-gate 
1832*0Sstevel@tonic-gate 	/*
1833*0Sstevel@tonic-gate 	 * Before returning, we must re-enable errors, and
1834*0Sstevel@tonic-gate 	 * reset the caches to their boot-up state.
1835*0Sstevel@tonic-gate 	 */
1836*0Sstevel@tonic-gate 	set_lsu(get_lsu() | cache_boot_state);
1837*0Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
1838*0Sstevel@tonic-gate }
1839*0Sstevel@tonic-gate 
1840*0Sstevel@tonic-gate /*
1841*0Sstevel@tonic-gate  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1842*0Sstevel@tonic-gate  * This routine is shared by the CE and UE handling code.
1843*0Sstevel@tonic-gate  */
1844*0Sstevel@tonic-gate static void
1845*0Sstevel@tonic-gate check_misc_err(spitf_async_flt *spf_flt)
1846*0Sstevel@tonic-gate {
1847*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1848*0Sstevel@tonic-gate 	char *fatal_str = NULL;
1849*0Sstevel@tonic-gate 
1850*0Sstevel@tonic-gate 	/*
1851*0Sstevel@tonic-gate 	 * The ISAP and ETP errors are supposed to cause a POR
1852*0Sstevel@tonic-gate 	 * from the system, so in theory we never, ever see these messages.
1853*0Sstevel@tonic-gate 	 * ISAP, ETP and IVUE are considered to be fatal.
1854*0Sstevel@tonic-gate 	 */
1855*0Sstevel@tonic-gate 	if (aflt->flt_stat & P_AFSR_ISAP)
1856*0Sstevel@tonic-gate 		fatal_str = " System Address Parity Error on";
1857*0Sstevel@tonic-gate 	else if (aflt->flt_stat & P_AFSR_ETP)
1858*0Sstevel@tonic-gate 		fatal_str = " Ecache Tag Parity Error on";
1859*0Sstevel@tonic-gate 	else if (aflt->flt_stat & P_AFSR_IVUE)
1860*0Sstevel@tonic-gate 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1861*0Sstevel@tonic-gate 	if (fatal_str != NULL) {
1862*0Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1863*0Sstevel@tonic-gate 			NULL, fatal_str);
1864*0Sstevel@tonic-gate 	}
1865*0Sstevel@tonic-gate }
1866*0Sstevel@tonic-gate 
1867*0Sstevel@tonic-gate /*
1868*0Sstevel@tonic-gate  * Routine to convert a syndrome into a syndrome code.
1869*0Sstevel@tonic-gate  */
1870*0Sstevel@tonic-gate static int
1871*0Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd)
1872*0Sstevel@tonic-gate {
1873*0Sstevel@tonic-gate 	if (synd_status != AFLT_STAT_VALID)
1874*0Sstevel@tonic-gate 		return (-1);
1875*0Sstevel@tonic-gate 
1876*0Sstevel@tonic-gate 	/*
1877*0Sstevel@tonic-gate 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1878*0Sstevel@tonic-gate 	 * to get the code indicating which bit(s) is(are) bad.
1879*0Sstevel@tonic-gate 	 */
1880*0Sstevel@tonic-gate 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1881*0Sstevel@tonic-gate 		return (-1);
1882*0Sstevel@tonic-gate 	else
1883*0Sstevel@tonic-gate 		return (ecc_syndrome_tab[synd]);
1884*0Sstevel@tonic-gate }
1885*0Sstevel@tonic-gate 
1886*0Sstevel@tonic-gate /*
1887*0Sstevel@tonic-gate  * Routine to return a string identifying the physical name
1888*0Sstevel@tonic-gate  * associated with a memory/cache error.
1889*0Sstevel@tonic-gate  */
1890*0Sstevel@tonic-gate /* ARGSUSED */
1891*0Sstevel@tonic-gate int
1892*0Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1893*0Sstevel@tonic-gate     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1894*0Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
1895*0Sstevel@tonic-gate {
1896*0Sstevel@tonic-gate 	short synd_code;
1897*0Sstevel@tonic-gate 	int ret;
1898*0Sstevel@tonic-gate 
1899*0Sstevel@tonic-gate 	if (flt_in_memory) {
1900*0Sstevel@tonic-gate 		synd_code = synd_to_synd_code(synd_status, synd);
1901*0Sstevel@tonic-gate 		if (synd_code == -1) {
1902*0Sstevel@tonic-gate 			ret = EINVAL;
1903*0Sstevel@tonic-gate 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1904*0Sstevel@tonic-gate 		    buf, buflen, lenp) != 0) {
1905*0Sstevel@tonic-gate 			ret = EIO;
1906*0Sstevel@tonic-gate 		} else if (*lenp <= 1) {
1907*0Sstevel@tonic-gate 			ret = EINVAL;
1908*0Sstevel@tonic-gate 		} else {
1909*0Sstevel@tonic-gate 			ret = 0;
1910*0Sstevel@tonic-gate 		}
1911*0Sstevel@tonic-gate 	} else {
1912*0Sstevel@tonic-gate 		ret = ENOTSUP;
1913*0Sstevel@tonic-gate 	}
1914*0Sstevel@tonic-gate 
1915*0Sstevel@tonic-gate 	if (ret != 0) {
1916*0Sstevel@tonic-gate 		buf[0] = '\0';
1917*0Sstevel@tonic-gate 		*lenp = 0;
1918*0Sstevel@tonic-gate 	}
1919*0Sstevel@tonic-gate 
1920*0Sstevel@tonic-gate 	return (ret);
1921*0Sstevel@tonic-gate }
1922*0Sstevel@tonic-gate 
1923*0Sstevel@tonic-gate /*
1924*0Sstevel@tonic-gate  * Wrapper for cpu_get_mem_unum() routine that takes an
1925*0Sstevel@tonic-gate  * async_flt struct rather than explicit arguments.
1926*0Sstevel@tonic-gate  */
1927*0Sstevel@tonic-gate int
1928*0Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1929*0Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
1930*0Sstevel@tonic-gate {
1931*0Sstevel@tonic-gate 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1932*0Sstevel@tonic-gate 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1933*0Sstevel@tonic-gate 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1934*0Sstevel@tonic-gate }
1935*0Sstevel@tonic-gate 
1936*0Sstevel@tonic-gate /*
1937*0Sstevel@tonic-gate  * This routine is a more generic interface to cpu_get_mem_unum(),
1938*0Sstevel@tonic-gate  * that may be used by other modules (e.g. mm).
1939*0Sstevel@tonic-gate  */
1940*0Sstevel@tonic-gate int
1941*0Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1942*0Sstevel@tonic-gate 		char *buf, int buflen, int *lenp)
1943*0Sstevel@tonic-gate {
1944*0Sstevel@tonic-gate 	int synd_status, flt_in_memory, ret;
1945*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
1946*0Sstevel@tonic-gate 
1947*0Sstevel@tonic-gate 	/*
1948*0Sstevel@tonic-gate 	 * Check for an invalid address.
1949*0Sstevel@tonic-gate 	 */
1950*0Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
1951*0Sstevel@tonic-gate 		return (ENXIO);
1952*0Sstevel@tonic-gate 
1953*0Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
1954*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
1955*0Sstevel@tonic-gate 	else
1956*0Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
1957*0Sstevel@tonic-gate 
1958*0Sstevel@tonic-gate 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1959*0Sstevel@tonic-gate 
1960*0Sstevel@tonic-gate 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1961*0Sstevel@tonic-gate 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1962*0Sstevel@tonic-gate 	    != 0)
1963*0Sstevel@tonic-gate 		return (ret);
1964*0Sstevel@tonic-gate 
1965*0Sstevel@tonic-gate 	if (*lenp >= buflen)
1966*0Sstevel@tonic-gate 		return (ENAMETOOLONG);
1967*0Sstevel@tonic-gate 
1968*0Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
1969*0Sstevel@tonic-gate 
1970*0Sstevel@tonic-gate 	return (0);
1971*0Sstevel@tonic-gate }
1972*0Sstevel@tonic-gate 
1973*0Sstevel@tonic-gate /*
1974*0Sstevel@tonic-gate  * Routine to return memory information associated
1975*0Sstevel@tonic-gate  * with a physical address and syndrome.
1976*0Sstevel@tonic-gate  */
1977*0Sstevel@tonic-gate /* ARGSUSED */
1978*0Sstevel@tonic-gate int
1979*0Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar,
1980*0Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1981*0Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp)
1982*0Sstevel@tonic-gate {
1983*0Sstevel@tonic-gate 	return (ENOTSUP);
1984*0Sstevel@tonic-gate }
1985*0Sstevel@tonic-gate 
1986*0Sstevel@tonic-gate /*
1987*0Sstevel@tonic-gate  * Routine to return a string identifying the physical
1988*0Sstevel@tonic-gate  * name associated with a cpuid.
1989*0Sstevel@tonic-gate  */
1990*0Sstevel@tonic-gate /* ARGSUSED */
1991*0Sstevel@tonic-gate int
1992*0Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1993*0Sstevel@tonic-gate {
1994*0Sstevel@tonic-gate 	return (ENOTSUP);
1995*0Sstevel@tonic-gate }
1996*0Sstevel@tonic-gate 
1997*0Sstevel@tonic-gate /*
1998*0Sstevel@tonic-gate  * This routine returns the size of the kernel's FRU name buffer.
1999*0Sstevel@tonic-gate  */
2000*0Sstevel@tonic-gate size_t
2001*0Sstevel@tonic-gate cpu_get_name_bufsize()
2002*0Sstevel@tonic-gate {
2003*0Sstevel@tonic-gate 	return (UNUM_NAMLEN);
2004*0Sstevel@tonic-gate }
2005*0Sstevel@tonic-gate 
2006*0Sstevel@tonic-gate /*
2007*0Sstevel@tonic-gate  * Cpu specific log func for UEs.
2008*0Sstevel@tonic-gate  */
2009*0Sstevel@tonic-gate static void
2010*0Sstevel@tonic-gate log_ue_err(struct async_flt *aflt, char *unum)
2011*0Sstevel@tonic-gate {
2012*0Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2013*0Sstevel@tonic-gate 	int len = 0;
2014*0Sstevel@tonic-gate 
2015*0Sstevel@tonic-gate #ifdef DEBUG
2016*0Sstevel@tonic-gate 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2017*0Sstevel@tonic-gate 
2018*0Sstevel@tonic-gate 	/*
2019*0Sstevel@tonic-gate 	 * Paranoid Check for priv mismatch
2020*0Sstevel@tonic-gate 	 * Only applicable for UEs
2021*0Sstevel@tonic-gate 	 */
2022*0Sstevel@tonic-gate 	if (afsr_priv != aflt->flt_priv) {
2023*0Sstevel@tonic-gate 		/*
2024*0Sstevel@tonic-gate 		 * The priv bits in %tstate and %afsr did not match; we expect
2025*0Sstevel@tonic-gate 		 * this to be very rare, so flag it with a message.
2026*0Sstevel@tonic-gate 		 */
2027*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2028*0Sstevel@tonic-gate 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2029*0Sstevel@tonic-gate 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2030*0Sstevel@tonic-gate 
2031*0Sstevel@tonic-gate 		/* update saved afsr to reflect the correct priv */
2032*0Sstevel@tonic-gate 		aflt->flt_stat &= ~P_AFSR_PRIV;
2033*0Sstevel@tonic-gate 		if (aflt->flt_priv)
2034*0Sstevel@tonic-gate 			aflt->flt_stat |= P_AFSR_PRIV;
2035*0Sstevel@tonic-gate 	}
2036*0Sstevel@tonic-gate #endif /* DEBUG */
2037*0Sstevel@tonic-gate 
2038*0Sstevel@tonic-gate 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2039*0Sstevel@tonic-gate 	    UNUM_NAMLEN, &len);
2040*0Sstevel@tonic-gate 
2041*0Sstevel@tonic-gate 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2042*0Sstevel@tonic-gate 	    " Uncorrectable Memory Error on");
2043*0Sstevel@tonic-gate 
2044*0Sstevel@tonic-gate 	if (SYND(aflt->flt_synd) == 0x3) {
2045*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2046*0Sstevel@tonic-gate 		    " Syndrome 0x3 indicates that this may not be a "
2047*0Sstevel@tonic-gate 		    "memory module problem");
2048*0Sstevel@tonic-gate 	}
2049*0Sstevel@tonic-gate 
2050*0Sstevel@tonic-gate 	if (aflt->flt_in_memory)
2051*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2052*0Sstevel@tonic-gate }
2053*0Sstevel@tonic-gate 
2054*0Sstevel@tonic-gate 
2055*0Sstevel@tonic-gate /*
2056*0Sstevel@tonic-gate  * The cpu_async_log_err() function is called via the ue_drain() function to
2057*0Sstevel@tonic-gate  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2058*0Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
2059*0Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2060*0Sstevel@tonic-gate  */
2061*0Sstevel@tonic-gate static void
2062*0Sstevel@tonic-gate cpu_async_log_err(void *flt)
2063*0Sstevel@tonic-gate {
2064*0Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2065*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)flt;
2066*0Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
2067*0Sstevel@tonic-gate 	char *space;
2068*0Sstevel@tonic-gate 	char *ecache_scrub_logstr = NULL;
2069*0Sstevel@tonic-gate 
2070*0Sstevel@tonic-gate 	switch (spf_flt->flt_type) {
2071*0Sstevel@tonic-gate 	    case CPU_UE_ERR:
2072*0Sstevel@tonic-gate 		/*
2073*0Sstevel@tonic-gate 		 * We want to skip logging only if ALL the following
2074*0Sstevel@tonic-gate 		 * conditions are true:
2075*0Sstevel@tonic-gate 		 *
2076*0Sstevel@tonic-gate 		 *	1. We are not panicking
2077*0Sstevel@tonic-gate 		 *	2. There is only one error
2078*0Sstevel@tonic-gate 		 *	3. That error is a memory error
2079*0Sstevel@tonic-gate 		 *	4. The error is caused by the memory scrubber (in
2080*0Sstevel@tonic-gate 		 *	   which case the error will have occurred under
2081*0Sstevel@tonic-gate 		 *	   on_trap protection)
2082*0Sstevel@tonic-gate 		 *	5. The error is on a retired page
2083*0Sstevel@tonic-gate 		 *
2084*0Sstevel@tonic-gate 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2085*0Sstevel@tonic-gate 		 * scrubber.  However, none of those errors should occur
2086*0Sstevel@tonic-gate 		 * on a retired page.
2087*0Sstevel@tonic-gate 		 *
2088*0Sstevel@tonic-gate 		 * Note 2: In the CE case, these errors are discarded before
2089*0Sstevel@tonic-gate 		 * the errorq.  In the UE case, we must wait until now --
2090*0Sstevel@tonic-gate 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2091*0Sstevel@tonic-gate 		 */
2092*0Sstevel@tonic-gate 		if (!panicstr &&
2093*0Sstevel@tonic-gate 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2094*0Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
2095*0Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
2096*0Sstevel@tonic-gate 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2097*0Sstevel@tonic-gate 
2098*0Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
2099*0Sstevel@tonic-gate 
2100*0Sstevel@tonic-gate 				/* Zero the address to clear the error */
2101*0Sstevel@tonic-gate 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2102*0Sstevel@tonic-gate 				return;
2103*0Sstevel@tonic-gate 			}
2104*0Sstevel@tonic-gate 		}
2105*0Sstevel@tonic-gate 
2106*0Sstevel@tonic-gate 		/*
2107*0Sstevel@tonic-gate 		 * Log the UE and check for causes of this UE error that
2108*0Sstevel@tonic-gate 		 * don't cause a trap (Copyback error).  cpu_async_error()
2109*0Sstevel@tonic-gate 		 * has already checked the i/o buses for us.
2110*0Sstevel@tonic-gate 		 */
2111*0Sstevel@tonic-gate 		log_ue_err(aflt, unum);
2112*0Sstevel@tonic-gate 		if (aflt->flt_in_memory)
2113*0Sstevel@tonic-gate 			cpu_check_allcpus(aflt);
2114*0Sstevel@tonic-gate 		break;
2115*0Sstevel@tonic-gate 
2116*0Sstevel@tonic-gate 	    case CPU_EDP_LDP_ERR:
2117*0Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_EDP)
2118*0Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2119*0Sstevel@tonic-gate 			    NULL, " EDP event on");
2120*0Sstevel@tonic-gate 
2121*0Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_LDP)
2122*0Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2123*0Sstevel@tonic-gate 			    NULL, " LDP event on");
2124*0Sstevel@tonic-gate 
2125*0Sstevel@tonic-gate 		/* Log ecache info if exist */
2126*0Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0) {
2127*0Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
2128*0Sstevel@tonic-gate 
2129*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2130*0Sstevel@tonic-gate 			    NULL, " AFAR was derived from E$Tag");
2131*0Sstevel@tonic-gate 		} else {
2132*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2133*0Sstevel@tonic-gate 			    NULL, " No error found in ecache (No fault "
2134*0Sstevel@tonic-gate 			    "PA available)");
2135*0Sstevel@tonic-gate 		}
2136*0Sstevel@tonic-gate 		break;
2137*0Sstevel@tonic-gate 
2138*0Sstevel@tonic-gate 	    case CPU_WP_ERR:
2139*0Sstevel@tonic-gate 		/*
2140*0Sstevel@tonic-gate 		 * If the memscrub thread hasn't yet read
2141*0Sstevel@tonic-gate 		 * all of memory, as we requested in the
2142*0Sstevel@tonic-gate 		 * trap handler, then give it a kick to
2143*0Sstevel@tonic-gate 		 * make sure it does.
2144*0Sstevel@tonic-gate 		 */
2145*0Sstevel@tonic-gate 		if (!isus2i && !isus2e && read_all_memscrub)
2146*0Sstevel@tonic-gate 			memscrub_run();
2147*0Sstevel@tonic-gate 
2148*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2149*0Sstevel@tonic-gate 		    " WP event on");
2150*0Sstevel@tonic-gate 		return;
2151*0Sstevel@tonic-gate 
2152*0Sstevel@tonic-gate 	    case CPU_BTO_BERR_ERR:
2153*0Sstevel@tonic-gate 		/*
2154*0Sstevel@tonic-gate 		 * A bus timeout or error occurred that was in user mode or not
2155*0Sstevel@tonic-gate 		 * in a protected kernel code region.
2156*0Sstevel@tonic-gate 		 */
2157*0Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_BERR) {
2158*0Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2159*0Sstevel@tonic-gate 			    spf_flt, BERRTO_LFLAGS, NULL,
2160*0Sstevel@tonic-gate 			    " Bus Error on System Bus in %s mode from",
2161*0Sstevel@tonic-gate 			    aflt->flt_priv ? "privileged" : "user");
2162*0Sstevel@tonic-gate 		}
2163*0Sstevel@tonic-gate 
2164*0Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_TO) {
2165*0Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2166*0Sstevel@tonic-gate 			    spf_flt, BERRTO_LFLAGS, NULL,
2167*0Sstevel@tonic-gate 			    " Timeout on System Bus in %s mode from",
2168*0Sstevel@tonic-gate 			    aflt->flt_priv ? "privileged" : "user");
2169*0Sstevel@tonic-gate 		}
2170*0Sstevel@tonic-gate 
2171*0Sstevel@tonic-gate 		return;
2172*0Sstevel@tonic-gate 
2173*0Sstevel@tonic-gate 	    case CPU_PANIC_CP_ERR:
2174*0Sstevel@tonic-gate 		/*
2175*0Sstevel@tonic-gate 		 * Process the Copyback (CP) error info (if any) obtained from
2176*0Sstevel@tonic-gate 		 * polling all the cpus in the panic flow. This case is only
2177*0Sstevel@tonic-gate 		 * entered if we are panicking.
2178*0Sstevel@tonic-gate 		 */
2179*0Sstevel@tonic-gate 		ASSERT(panicstr != NULL);
2180*0Sstevel@tonic-gate 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2181*0Sstevel@tonic-gate 
2182*0Sstevel@tonic-gate 		/* See which space - this info may not exist */
2183*0Sstevel@tonic-gate 		if (panic_aflt.flt_status & ECC_D_TRAP)
2184*0Sstevel@tonic-gate 			space = "Data ";
2185*0Sstevel@tonic-gate 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2186*0Sstevel@tonic-gate 			space = "Instruction ";
2187*0Sstevel@tonic-gate 		else
2188*0Sstevel@tonic-gate 			space = "";
2189*0Sstevel@tonic-gate 
2190*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2191*0Sstevel@tonic-gate 		    " AFAR was derived from UE report,"
2192*0Sstevel@tonic-gate 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2193*0Sstevel@tonic-gate 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2194*0Sstevel@tonic-gate 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2195*0Sstevel@tonic-gate 
2196*0Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0)
2197*0Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
2198*0Sstevel@tonic-gate 		else
2199*0Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2200*0Sstevel@tonic-gate 			    NULL, " No cache dump available");
2201*0Sstevel@tonic-gate 
2202*0Sstevel@tonic-gate 		return;
2203*0Sstevel@tonic-gate 
2204*0Sstevel@tonic-gate 	    case CPU_TRAPPING_CP_ERR:
2205*0Sstevel@tonic-gate 		/*
2206*0Sstevel@tonic-gate 		 * For sabre only.  This is a copyback ecache parity error due
2207*0Sstevel@tonic-gate 		 * to a PCI DMA read.  We should be panicking if we get here.
2208*0Sstevel@tonic-gate 		 */
2209*0Sstevel@tonic-gate 		ASSERT(panicstr != NULL);
2210*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2211*0Sstevel@tonic-gate 		    " AFAR was derived from UE report,"
2212*0Sstevel@tonic-gate 		    " CP event on CPU%d (caused Data access error "
2213*0Sstevel@tonic-gate 		    "on PCIBus)", aflt->flt_inst);
2214*0Sstevel@tonic-gate 		return;
2215*0Sstevel@tonic-gate 
2216*0Sstevel@tonic-gate 		/*
2217*0Sstevel@tonic-gate 		 * We log the ecache lines of the following states,
2218*0Sstevel@tonic-gate 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2219*0Sstevel@tonic-gate 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2220*0Sstevel@tonic-gate 		 * in addition to logging if ecache_scrub_panic is set.
2221*0Sstevel@tonic-gate 		 */
2222*0Sstevel@tonic-gate 	    case CPU_BADLINE_CI_ERR:
2223*0Sstevel@tonic-gate 		ecache_scrub_logstr = "CBI";
2224*0Sstevel@tonic-gate 		/* FALLTHRU */
2225*0Sstevel@tonic-gate 
2226*0Sstevel@tonic-gate 	    case CPU_BADLINE_CB_ERR:
2227*0Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
2228*0Sstevel@tonic-gate 			ecache_scrub_logstr = "CBB";
2229*0Sstevel@tonic-gate 		/* FALLTHRU */
2230*0Sstevel@tonic-gate 
2231*0Sstevel@tonic-gate 	    case CPU_BADLINE_DI_ERR:
2232*0Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
2233*0Sstevel@tonic-gate 			ecache_scrub_logstr = "DBI";
2234*0Sstevel@tonic-gate 		/* FALLTHRU */
2235*0Sstevel@tonic-gate 
2236*0Sstevel@tonic-gate 	    case CPU_BADLINE_DB_ERR:
2237*0Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
2238*0Sstevel@tonic-gate 			ecache_scrub_logstr = "DBB";
2239*0Sstevel@tonic-gate 
2240*0Sstevel@tonic-gate 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2241*0Sstevel@tonic-gate 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2242*0Sstevel@tonic-gate 			" %s event on", ecache_scrub_logstr);
2243*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2244*0Sstevel@tonic-gate 
2245*0Sstevel@tonic-gate 		return;
2246*0Sstevel@tonic-gate 
2247*0Sstevel@tonic-gate 	    case CPU_ORPHAN_CP_ERR:
2248*0Sstevel@tonic-gate 		/*
2249*0Sstevel@tonic-gate 		 * Orphan CPs, where the CP bit is set, but when a CPU
2250*0Sstevel@tonic-gate 		 * doesn't report a UE.
2251*0Sstevel@tonic-gate 		 */
2252*0Sstevel@tonic-gate 		if (read_all_memscrub)
2253*0Sstevel@tonic-gate 			memscrub_run();
2254*0Sstevel@tonic-gate 
2255*0Sstevel@tonic-gate 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2256*0Sstevel@tonic-gate 			NULL, " Orphan CP event on");
2257*0Sstevel@tonic-gate 
2258*0Sstevel@tonic-gate 		/* Log ecache info if exist */
2259*0Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0)
2260*0Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
2261*0Sstevel@tonic-gate 		else
2262*0Sstevel@tonic-gate 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2263*0Sstevel@tonic-gate 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2264*0Sstevel@tonic-gate 				" No error found in ecache (No fault "
2265*0Sstevel@tonic-gate 				"PA available");
2266*0Sstevel@tonic-gate 		return;
2267*0Sstevel@tonic-gate 
2268*0Sstevel@tonic-gate 	    case CPU_ECACHE_ADDR_PAR_ERR:
2269*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270*0Sstevel@tonic-gate 				" E$ Tag Address Parity error on");
2271*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2272*0Sstevel@tonic-gate 		return;
2273*0Sstevel@tonic-gate 
2274*0Sstevel@tonic-gate 	    case CPU_ECACHE_STATE_ERR:
2275*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276*0Sstevel@tonic-gate 				" E$ Tag State Parity error on");
2277*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2278*0Sstevel@tonic-gate 		return;
2279*0Sstevel@tonic-gate 
2280*0Sstevel@tonic-gate 	    case CPU_ECACHE_TAG_ERR:
2281*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2282*0Sstevel@tonic-gate 				" E$ Tag scrub event on");
2283*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2284*0Sstevel@tonic-gate 		return;
2285*0Sstevel@tonic-gate 
2286*0Sstevel@tonic-gate 	    case CPU_ECACHE_ETP_ETS_ERR:
2287*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2288*0Sstevel@tonic-gate 				" AFSR.ETP is set and AFSR.ETS is zero on");
2289*0Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
2290*0Sstevel@tonic-gate 		return;
2291*0Sstevel@tonic-gate 
2292*0Sstevel@tonic-gate 
2293*0Sstevel@tonic-gate 	    case CPU_ADDITIONAL_ERR:
2294*0Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2295*0Sstevel@tonic-gate 		    " Additional errors detected during error processing on");
2296*0Sstevel@tonic-gate 		return;
2297*0Sstevel@tonic-gate 
2298*0Sstevel@tonic-gate 	    default:
2299*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2300*0Sstevel@tonic-gate 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2301*0Sstevel@tonic-gate 		return;
2302*0Sstevel@tonic-gate 	}
2303*0Sstevel@tonic-gate 
2304*0Sstevel@tonic-gate 	/* ... fall through from the UE, EDP, or LDP cases */
2305*0Sstevel@tonic-gate 
2306*0Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2307*0Sstevel@tonic-gate 		if (!panicstr) {
2308*0Sstevel@tonic-gate 			/*
2309*0Sstevel@tonic-gate 			 * Retire the bad page that caused the error
2310*0Sstevel@tonic-gate 			 */
2311*0Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
2312*0Sstevel@tonic-gate 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2313*0Sstevel@tonic-gate 
2314*0Sstevel@tonic-gate 			if (pp != NULL) {
2315*0Sstevel@tonic-gate 				page_settoxic(pp, PAGE_IS_FAULTY);
2316*0Sstevel@tonic-gate 				(void) page_retire(pp, PAGE_IS_TOXIC);
2317*0Sstevel@tonic-gate 			} else {
2318*0Sstevel@tonic-gate 				uint64_t pa =
2319*0Sstevel@tonic-gate 				    P2ALIGN(aflt->flt_addr, MMU_PAGESIZE);
2320*0Sstevel@tonic-gate 
2321*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 3, spf_flt,
2322*0Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
2323*0Sstevel@tonic-gate 				    ": cannot schedule clearing of error on "
2324*0Sstevel@tonic-gate 				    "page 0x%08x.%08x; page not in VM system",
2325*0Sstevel@tonic-gate 				    (uint32_t)(pa >> 32), (uint32_t)pa);
2326*0Sstevel@tonic-gate 			}
2327*0Sstevel@tonic-gate 		} else {
2328*0Sstevel@tonic-gate 			/*
2329*0Sstevel@tonic-gate 			 * Clear UEs on panic so that we don't
2330*0Sstevel@tonic-gate 			 * get haunted by them during panic or
2331*0Sstevel@tonic-gate 			 * after reboot
2332*0Sstevel@tonic-gate 			 */
2333*0Sstevel@tonic-gate 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2334*0Sstevel@tonic-gate 			    cpunodes[CPU->cpu_id].ecache_size,
2335*0Sstevel@tonic-gate 			    cpunodes[CPU->cpu_id].ecache_linesize);
2336*0Sstevel@tonic-gate 
2337*0Sstevel@tonic-gate 			(void) clear_errors(NULL, NULL);
2338*0Sstevel@tonic-gate 		}
2339*0Sstevel@tonic-gate 	}
2340*0Sstevel@tonic-gate 
2341*0Sstevel@tonic-gate 	/*
2342*0Sstevel@tonic-gate 	 * Log final recover message
2343*0Sstevel@tonic-gate 	 */
2344*0Sstevel@tonic-gate 	if (!panicstr) {
2345*0Sstevel@tonic-gate 		if (!aflt->flt_priv) {
2346*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2347*0Sstevel@tonic-gate 			    NULL, " Above Error is in User Mode"
2348*0Sstevel@tonic-gate 			    "\n    and is fatal: "
2349*0Sstevel@tonic-gate 			    "will SIGKILL process and notify contract");
2350*0Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2351*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2352*0Sstevel@tonic-gate 			    NULL, " Above Error detected while dumping core;"
2353*0Sstevel@tonic-gate 			    "\n    core file will be truncated");
2354*0Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2355*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2356*0Sstevel@tonic-gate 			    NULL, " Above Error is due to Kernel access"
2357*0Sstevel@tonic-gate 			    "\n    to User space and is fatal: "
2358*0Sstevel@tonic-gate 			    "will SIGKILL process and notify contract");
2359*0Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2360*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2361*0Sstevel@tonic-gate 			    " Above Error detected by protected Kernel code"
2362*0Sstevel@tonic-gate 			    "\n    that will try to clear error from system");
2363*0Sstevel@tonic-gate 		}
2364*0Sstevel@tonic-gate 	}
2365*0Sstevel@tonic-gate }
2366*0Sstevel@tonic-gate 
2367*0Sstevel@tonic-gate 
2368*0Sstevel@tonic-gate /*
2369*0Sstevel@tonic-gate  * Check all cpus for non-trapping UE-causing errors
2370*0Sstevel@tonic-gate  * In Ultra I/II, we look for copyback errors (CPs)
2371*0Sstevel@tonic-gate  */
2372*0Sstevel@tonic-gate void
2373*0Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt)
2374*0Sstevel@tonic-gate {
2375*0Sstevel@tonic-gate 	spitf_async_flt cp;
2376*0Sstevel@tonic-gate 	spitf_async_flt *spf_cpflt = &cp;
2377*0Sstevel@tonic-gate 	struct async_flt *cpflt = (struct async_flt *)&cp;
2378*0Sstevel@tonic-gate 	int pix;
2379*0Sstevel@tonic-gate 
2380*0Sstevel@tonic-gate 	cpflt->flt_id = aflt->flt_id;
2381*0Sstevel@tonic-gate 	cpflt->flt_addr = aflt->flt_addr;
2382*0Sstevel@tonic-gate 
2383*0Sstevel@tonic-gate 	for (pix = 0; pix < NCPU; pix++) {
2384*0Sstevel@tonic-gate 		if (CPU_XCALL_READY(pix)) {
2385*0Sstevel@tonic-gate 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2386*0Sstevel@tonic-gate 			    (uint64_t)cpflt, 0);
2387*0Sstevel@tonic-gate 
2388*0Sstevel@tonic-gate 			if (cpflt->flt_stat & P_AFSR_CP) {
2389*0Sstevel@tonic-gate 				char *space;
2390*0Sstevel@tonic-gate 
2391*0Sstevel@tonic-gate 				/* See which space - this info may not exist */
2392*0Sstevel@tonic-gate 				if (aflt->flt_status & ECC_D_TRAP)
2393*0Sstevel@tonic-gate 					space = "Data ";
2394*0Sstevel@tonic-gate 				else if (aflt->flt_status & ECC_I_TRAP)
2395*0Sstevel@tonic-gate 					space = "Instruction ";
2396*0Sstevel@tonic-gate 				else
2397*0Sstevel@tonic-gate 					space = "";
2398*0Sstevel@tonic-gate 
2399*0Sstevel@tonic-gate 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2400*0Sstevel@tonic-gate 				    NULL, " AFAR was derived from UE report,"
2401*0Sstevel@tonic-gate 				    " CP event on CPU%d (caused %saccess "
2402*0Sstevel@tonic-gate 				    "error on %s%d)", pix, space,
2403*0Sstevel@tonic-gate 				    (aflt->flt_status & ECC_IOBUS) ?
2404*0Sstevel@tonic-gate 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2405*0Sstevel@tonic-gate 
2406*0Sstevel@tonic-gate 				if (spf_cpflt->flt_ec_lcnt > 0)
2407*0Sstevel@tonic-gate 					cpu_log_ecmem_info(spf_cpflt);
2408*0Sstevel@tonic-gate 				else
2409*0Sstevel@tonic-gate 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2410*0Sstevel@tonic-gate 					    CPU_ERRID_FIRST, NULL,
2411*0Sstevel@tonic-gate 					    " No cache dump available");
2412*0Sstevel@tonic-gate 			}
2413*0Sstevel@tonic-gate 		}
2414*0Sstevel@tonic-gate 	}
2415*0Sstevel@tonic-gate }
2416*0Sstevel@tonic-gate 
2417*0Sstevel@tonic-gate #ifdef DEBUG
2418*0Sstevel@tonic-gate int test_mp_cp = 0;
2419*0Sstevel@tonic-gate #endif
2420*0Sstevel@tonic-gate 
2421*0Sstevel@tonic-gate /*
2422*0Sstevel@tonic-gate  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2423*0Sstevel@tonic-gate  * for copyback errors and capture relevant information.
2424*0Sstevel@tonic-gate  */
2425*0Sstevel@tonic-gate static uint_t
2426*0Sstevel@tonic-gate get_cpu_status(uint64_t arg)
2427*0Sstevel@tonic-gate {
2428*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)arg;
2429*0Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2430*0Sstevel@tonic-gate 	uint64_t afsr;
2431*0Sstevel@tonic-gate 	uint32_t ec_idx;
2432*0Sstevel@tonic-gate 	uint64_t sdbh, sdbl;
2433*0Sstevel@tonic-gate 	int i;
2434*0Sstevel@tonic-gate 	uint32_t ec_set_size;
2435*0Sstevel@tonic-gate 	uchar_t valid;
2436*0Sstevel@tonic-gate 	ec_data_t ec_data[8];
2437*0Sstevel@tonic-gate 	uint64_t ec_tag, flt_addr_tag, oafsr;
2438*0Sstevel@tonic-gate 	uint64_t *acc_afsr = NULL;
2439*0Sstevel@tonic-gate 
2440*0Sstevel@tonic-gate 	get_asyncflt(&afsr);
2441*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) != NULL) {
2442*0Sstevel@tonic-gate 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2443*0Sstevel@tonic-gate 		afsr |= *acc_afsr;
2444*0Sstevel@tonic-gate 		*acc_afsr = 0;
2445*0Sstevel@tonic-gate 	}
2446*0Sstevel@tonic-gate 
2447*0Sstevel@tonic-gate #ifdef DEBUG
2448*0Sstevel@tonic-gate 	if (test_mp_cp)
2449*0Sstevel@tonic-gate 		afsr |= P_AFSR_CP;
2450*0Sstevel@tonic-gate #endif
2451*0Sstevel@tonic-gate 	aflt->flt_stat = afsr;
2452*0Sstevel@tonic-gate 
2453*0Sstevel@tonic-gate 	if (afsr & P_AFSR_CP) {
2454*0Sstevel@tonic-gate 		/*
2455*0Sstevel@tonic-gate 		 * Capture the UDBs
2456*0Sstevel@tonic-gate 		 */
2457*0Sstevel@tonic-gate 		get_udb_errors(&sdbh, &sdbl);
2458*0Sstevel@tonic-gate 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2459*0Sstevel@tonic-gate 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2460*0Sstevel@tonic-gate 
2461*0Sstevel@tonic-gate 		/*
2462*0Sstevel@tonic-gate 		 * Clear CP bit before capturing ecache data
2463*0Sstevel@tonic-gate 		 * and AFSR info.
2464*0Sstevel@tonic-gate 		 */
2465*0Sstevel@tonic-gate 		set_asyncflt(P_AFSR_CP);
2466*0Sstevel@tonic-gate 
2467*0Sstevel@tonic-gate 		/*
2468*0Sstevel@tonic-gate 		 * See if we can capture the ecache line for the
2469*0Sstevel@tonic-gate 		 * fault PA.
2470*0Sstevel@tonic-gate 		 *
2471*0Sstevel@tonic-gate 		 * Return a valid matching ecache line, if any.
2472*0Sstevel@tonic-gate 		 * Otherwise, return the first matching ecache
2473*0Sstevel@tonic-gate 		 * line marked invalid.
2474*0Sstevel@tonic-gate 		 */
2475*0Sstevel@tonic-gate 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2476*0Sstevel@tonic-gate 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2477*0Sstevel@tonic-gate 		    ecache_associativity;
2478*0Sstevel@tonic-gate 		spf_flt->flt_ec_lcnt = 0;
2479*0Sstevel@tonic-gate 
2480*0Sstevel@tonic-gate 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2481*0Sstevel@tonic-gate 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2482*0Sstevel@tonic-gate 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2483*0Sstevel@tonic-gate 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2484*0Sstevel@tonic-gate 				    acc_afsr);
2485*0Sstevel@tonic-gate 
2486*0Sstevel@tonic-gate 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2487*0Sstevel@tonic-gate 				continue;
2488*0Sstevel@tonic-gate 
2489*0Sstevel@tonic-gate 			valid = cpu_ec_state_valid &
2490*0Sstevel@tonic-gate 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2491*0Sstevel@tonic-gate 			    cpu_ec_state_shift);
2492*0Sstevel@tonic-gate 
2493*0Sstevel@tonic-gate 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2494*0Sstevel@tonic-gate 				spf_flt->flt_ec_tag = ec_tag;
2495*0Sstevel@tonic-gate 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2496*0Sstevel@tonic-gate 				    sizeof (ec_data));
2497*0Sstevel@tonic-gate 				spf_flt->flt_ec_lcnt = 1;
2498*0Sstevel@tonic-gate 
2499*0Sstevel@tonic-gate 				if (valid)
2500*0Sstevel@tonic-gate 					break;
2501*0Sstevel@tonic-gate 			}
2502*0Sstevel@tonic-gate 		}
2503*0Sstevel@tonic-gate 	}
2504*0Sstevel@tonic-gate 	return (0);
2505*0Sstevel@tonic-gate }
2506*0Sstevel@tonic-gate 
2507*0Sstevel@tonic-gate /*
2508*0Sstevel@tonic-gate  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2509*0Sstevel@tonic-gate  * from panic_idle() as part of the other CPUs stopping themselves when a
2510*0Sstevel@tonic-gate  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2511*0Sstevel@tonic-gate  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2512*0Sstevel@tonic-gate  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2513*0Sstevel@tonic-gate  * CP error information.
2514*0Sstevel@tonic-gate  */
2515*0Sstevel@tonic-gate void
2516*0Sstevel@tonic-gate cpu_async_panic_callb(void)
2517*0Sstevel@tonic-gate {
2518*0Sstevel@tonic-gate 	spitf_async_flt cp;
2519*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)&cp;
2520*0Sstevel@tonic-gate 	uint64_t *scrub_afsr;
2521*0Sstevel@tonic-gate 
2522*0Sstevel@tonic-gate 	if (panic_aflt.flt_id != 0) {
2523*0Sstevel@tonic-gate 		aflt->flt_addr = panic_aflt.flt_addr;
2524*0Sstevel@tonic-gate 		(void) get_cpu_status((uint64_t)aflt);
2525*0Sstevel@tonic-gate 
2526*0Sstevel@tonic-gate 		if (CPU_PRIVATE(CPU) != NULL) {
2527*0Sstevel@tonic-gate 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2528*0Sstevel@tonic-gate 			if (*scrub_afsr & P_AFSR_CP) {
2529*0Sstevel@tonic-gate 				aflt->flt_stat |= *scrub_afsr;
2530*0Sstevel@tonic-gate 				*scrub_afsr = 0;
2531*0Sstevel@tonic-gate 			}
2532*0Sstevel@tonic-gate 		}
2533*0Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_CP) {
2534*0Sstevel@tonic-gate 			aflt->flt_id = panic_aflt.flt_id;
2535*0Sstevel@tonic-gate 			aflt->flt_panic = 1;
2536*0Sstevel@tonic-gate 			aflt->flt_inst = CPU->cpu_id;
2537*0Sstevel@tonic-gate 			aflt->flt_class = CPU_FAULT;
2538*0Sstevel@tonic-gate 			cp.flt_type = CPU_PANIC_CP_ERR;
2539*0Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2540*0Sstevel@tonic-gate 			    (void *)&cp, sizeof (cp), ue_queue,
2541*0Sstevel@tonic-gate 			    aflt->flt_panic);
2542*0Sstevel@tonic-gate 		}
2543*0Sstevel@tonic-gate 	}
2544*0Sstevel@tonic-gate }
2545*0Sstevel@tonic-gate 
2546*0Sstevel@tonic-gate /*
2547*0Sstevel@tonic-gate  * Turn off all cpu error detection, normally only used for panics.
2548*0Sstevel@tonic-gate  */
2549*0Sstevel@tonic-gate void
2550*0Sstevel@tonic-gate cpu_disable_errors(void)
2551*0Sstevel@tonic-gate {
2552*0Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2553*0Sstevel@tonic-gate }
2554*0Sstevel@tonic-gate 
2555*0Sstevel@tonic-gate /*
2556*0Sstevel@tonic-gate  * Enable errors.
2557*0Sstevel@tonic-gate  */
2558*0Sstevel@tonic-gate void
2559*0Sstevel@tonic-gate cpu_enable_errors(void)
2560*0Sstevel@tonic-gate {
2561*0Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2562*0Sstevel@tonic-gate }
2563*0Sstevel@tonic-gate 
2564*0Sstevel@tonic-gate static void
2565*0Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2566*0Sstevel@tonic-gate {
2567*0Sstevel@tonic-gate 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2568*0Sstevel@tonic-gate 	int i, loop = 1;
2569*0Sstevel@tonic-gate 	ushort_t ecc_0;
2570*0Sstevel@tonic-gate 	uint64_t paddr;
2571*0Sstevel@tonic-gate 	uint64_t data;
2572*0Sstevel@tonic-gate 
2573*0Sstevel@tonic-gate 	if (verbose)
2574*0Sstevel@tonic-gate 		loop = 8;
2575*0Sstevel@tonic-gate 	for (i = 0; i < loop; i++) {
2576*0Sstevel@tonic-gate 		paddr = aligned_addr + (i * 8);
2577*0Sstevel@tonic-gate 		data = lddphys(paddr);
2578*0Sstevel@tonic-gate 		if (verbose) {
2579*0Sstevel@tonic-gate 			if (ce_err) {
2580*0Sstevel@tonic-gate 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2581*0Sstevel@tonic-gate 			    (uint32_t)data);
2582*0Sstevel@tonic-gate 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2583*0Sstevel@tonic-gate 				NULL, "    Paddr 0x%" PRIx64 ", "
2584*0Sstevel@tonic-gate 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2585*0Sstevel@tonic-gate 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2586*0Sstevel@tonic-gate 			} else {
2587*0Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2588*0Sstevel@tonic-gate 				    NULL, "    Paddr 0x%" PRIx64 ", "
2589*0Sstevel@tonic-gate 				    "Data 0x%08x.%08x", paddr,
2590*0Sstevel@tonic-gate 				    (uint32_t)(data>>32), (uint32_t)data);
2591*0Sstevel@tonic-gate 			}
2592*0Sstevel@tonic-gate 		}
2593*0Sstevel@tonic-gate 	}
2594*0Sstevel@tonic-gate }
2595*0Sstevel@tonic-gate 
2596*0Sstevel@tonic-gate static struct {		/* sec-ded-s4ed ecc code */
2597*0Sstevel@tonic-gate 	uint_t hi, lo;
2598*0Sstevel@tonic-gate } ecc_code[8] = {
2599*0Sstevel@tonic-gate 	{ 0xee55de23U, 0x16161161U },
2600*0Sstevel@tonic-gate 	{ 0x55eede93U, 0x61612212U },
2601*0Sstevel@tonic-gate 	{ 0xbb557b8cU, 0x49494494U },
2602*0Sstevel@tonic-gate 	{ 0x55bb7b6cU, 0x94948848U },
2603*0Sstevel@tonic-gate 	{ 0x16161161U, 0xee55de23U },
2604*0Sstevel@tonic-gate 	{ 0x61612212U, 0x55eede93U },
2605*0Sstevel@tonic-gate 	{ 0x49494494U, 0xbb557b8cU },
2606*0Sstevel@tonic-gate 	{ 0x94948848U, 0x55bb7b6cU }
2607*0Sstevel@tonic-gate };
2608*0Sstevel@tonic-gate 
2609*0Sstevel@tonic-gate static ushort_t
2610*0Sstevel@tonic-gate ecc_gen(uint_t high_bytes, uint_t low_bytes)
2611*0Sstevel@tonic-gate {
2612*0Sstevel@tonic-gate 	int i, j;
2613*0Sstevel@tonic-gate 	uchar_t checker, bit_mask;
2614*0Sstevel@tonic-gate 	struct {
2615*0Sstevel@tonic-gate 		uint_t hi, lo;
2616*0Sstevel@tonic-gate 	} hex_data, masked_data[8];
2617*0Sstevel@tonic-gate 
2618*0Sstevel@tonic-gate 	hex_data.hi = high_bytes;
2619*0Sstevel@tonic-gate 	hex_data.lo = low_bytes;
2620*0Sstevel@tonic-gate 
2621*0Sstevel@tonic-gate 	/* mask out bits according to sec-ded-s4ed ecc code */
2622*0Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
2623*0Sstevel@tonic-gate 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2624*0Sstevel@tonic-gate 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2625*0Sstevel@tonic-gate 	}
2626*0Sstevel@tonic-gate 
2627*0Sstevel@tonic-gate 	/*
2628*0Sstevel@tonic-gate 	 * xor all bits in masked_data[i] to get bit_i of checker,
2629*0Sstevel@tonic-gate 	 * where i = 0 to 7
2630*0Sstevel@tonic-gate 	 */
2631*0Sstevel@tonic-gate 	checker = 0;
2632*0Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
2633*0Sstevel@tonic-gate 		bit_mask = 1 << i;
2634*0Sstevel@tonic-gate 		for (j = 0; j < 32; j++) {
2635*0Sstevel@tonic-gate 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2636*0Sstevel@tonic-gate 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2637*0Sstevel@tonic-gate 			masked_data[i].hi >>= 1;
2638*0Sstevel@tonic-gate 			masked_data[i].lo >>= 1;
2639*0Sstevel@tonic-gate 		}
2640*0Sstevel@tonic-gate 	}
2641*0Sstevel@tonic-gate 	return (checker);
2642*0Sstevel@tonic-gate }
2643*0Sstevel@tonic-gate 
2644*0Sstevel@tonic-gate /*
2645*0Sstevel@tonic-gate  * Flush the entire ecache using displacement flush by reading through a
2646*0Sstevel@tonic-gate  * physical address range as large as the ecache.
2647*0Sstevel@tonic-gate  */
2648*0Sstevel@tonic-gate void
2649*0Sstevel@tonic-gate cpu_flush_ecache(void)
2650*0Sstevel@tonic-gate {
2651*0Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2652*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
2653*0Sstevel@tonic-gate }
2654*0Sstevel@tonic-gate 
2655*0Sstevel@tonic-gate /*
2656*0Sstevel@tonic-gate  * read and display the data in the cache line where the
2657*0Sstevel@tonic-gate  * original ce error occurred.
2658*0Sstevel@tonic-gate  * This routine is mainly used for debugging new hardware.
2659*0Sstevel@tonic-gate  */
2660*0Sstevel@tonic-gate void
2661*0Sstevel@tonic-gate read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2662*0Sstevel@tonic-gate {
2663*0Sstevel@tonic-gate 	kpreempt_disable();
2664*0Sstevel@tonic-gate 	/* disable ECC error traps */
2665*0Sstevel@tonic-gate 	set_error_enable(EER_ECC_DISABLE);
2666*0Sstevel@tonic-gate 
2667*0Sstevel@tonic-gate 	/*
2668*0Sstevel@tonic-gate 	 * flush the ecache
2669*0Sstevel@tonic-gate 	 * read the data
2670*0Sstevel@tonic-gate 	 * check to see if an ECC error occured
2671*0Sstevel@tonic-gate 	 */
2672*0Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2673*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
2674*0Sstevel@tonic-gate 	set_lsu(get_lsu() | cache_boot_state);
2675*0Sstevel@tonic-gate 	cpu_read_paddr(ecc, verbose, ce_err);
2676*0Sstevel@tonic-gate 	(void) check_ecc(ecc);
2677*0Sstevel@tonic-gate 
2678*0Sstevel@tonic-gate 	/* enable ECC error traps */
2679*0Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
2680*0Sstevel@tonic-gate 	kpreempt_enable();
2681*0Sstevel@tonic-gate }
2682*0Sstevel@tonic-gate 
2683*0Sstevel@tonic-gate /*
2684*0Sstevel@tonic-gate  * Check the AFSR bits for UE/CE persistence.
2685*0Sstevel@tonic-gate  * If UE or CE errors are detected, the routine will
2686*0Sstevel@tonic-gate  * clears all the AFSR sticky bits (except CP for
2687*0Sstevel@tonic-gate  * spitfire/blackbird) and the UDBs.
2688*0Sstevel@tonic-gate  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2689*0Sstevel@tonic-gate  */
2690*0Sstevel@tonic-gate static int
2691*0Sstevel@tonic-gate check_ecc(struct async_flt *ecc)
2692*0Sstevel@tonic-gate {
2693*0Sstevel@tonic-gate 	uint64_t t_afsr;
2694*0Sstevel@tonic-gate 	uint64_t t_afar;
2695*0Sstevel@tonic-gate 	uint64_t udbh;
2696*0Sstevel@tonic-gate 	uint64_t udbl;
2697*0Sstevel@tonic-gate 	ushort_t udb;
2698*0Sstevel@tonic-gate 	int persistent = 0;
2699*0Sstevel@tonic-gate 
2700*0Sstevel@tonic-gate 	/*
2701*0Sstevel@tonic-gate 	 * Capture the AFSR, AFAR and UDBs info
2702*0Sstevel@tonic-gate 	 */
2703*0Sstevel@tonic-gate 	get_asyncflt(&t_afsr);
2704*0Sstevel@tonic-gate 	get_asyncaddr(&t_afar);
2705*0Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;
2706*0Sstevel@tonic-gate 	get_udb_errors(&udbh, &udbl);
2707*0Sstevel@tonic-gate 
2708*0Sstevel@tonic-gate 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2709*0Sstevel@tonic-gate 		/*
2710*0Sstevel@tonic-gate 		 * Clear the errors
2711*0Sstevel@tonic-gate 		 */
2712*0Sstevel@tonic-gate 		clr_datapath();
2713*0Sstevel@tonic-gate 
2714*0Sstevel@tonic-gate 		if (isus2i || isus2e)
2715*0Sstevel@tonic-gate 			set_asyncflt(t_afsr);
2716*0Sstevel@tonic-gate 		else
2717*0Sstevel@tonic-gate 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2718*0Sstevel@tonic-gate 
2719*0Sstevel@tonic-gate 		/*
2720*0Sstevel@tonic-gate 		 * determine whether to check UDBH or UDBL for persistence
2721*0Sstevel@tonic-gate 		 */
2722*0Sstevel@tonic-gate 		if (ecc->flt_synd & UDBL_REG) {
2723*0Sstevel@tonic-gate 			udb = (ushort_t)udbl;
2724*0Sstevel@tonic-gate 			t_afar |= 0x8;
2725*0Sstevel@tonic-gate 		} else {
2726*0Sstevel@tonic-gate 			udb = (ushort_t)udbh;
2727*0Sstevel@tonic-gate 		}
2728*0Sstevel@tonic-gate 
2729*0Sstevel@tonic-gate 		if (ce_debug || ue_debug) {
2730*0Sstevel@tonic-gate 			spitf_async_flt spf_flt; /* for logging */
2731*0Sstevel@tonic-gate 			struct async_flt *aflt =
2732*0Sstevel@tonic-gate 				(struct async_flt *)&spf_flt;
2733*0Sstevel@tonic-gate 
2734*0Sstevel@tonic-gate 			/* Package the info nicely in the spf_flt struct */
2735*0Sstevel@tonic-gate 			bzero(&spf_flt, sizeof (spitf_async_flt));
2736*0Sstevel@tonic-gate 			aflt->flt_stat = t_afsr;
2737*0Sstevel@tonic-gate 			aflt->flt_addr = t_afar;
2738*0Sstevel@tonic-gate 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2739*0Sstevel@tonic-gate 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2740*0Sstevel@tonic-gate 
2741*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2742*0Sstevel@tonic-gate 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2743*0Sstevel@tonic-gate 			    " check_ecc: Dumping captured error states ...");
2744*0Sstevel@tonic-gate 		}
2745*0Sstevel@tonic-gate 
2746*0Sstevel@tonic-gate 		/*
2747*0Sstevel@tonic-gate 		 * if the fault addresses don't match, not persistent
2748*0Sstevel@tonic-gate 		 */
2749*0Sstevel@tonic-gate 		if (t_afar != ecc->flt_addr) {
2750*0Sstevel@tonic-gate 			return (persistent);
2751*0Sstevel@tonic-gate 		}
2752*0Sstevel@tonic-gate 
2753*0Sstevel@tonic-gate 		/*
2754*0Sstevel@tonic-gate 		 * check for UE persistence
2755*0Sstevel@tonic-gate 		 * since all DIMMs in the bank are identified for a UE,
2756*0Sstevel@tonic-gate 		 * there's no reason to check the syndrome
2757*0Sstevel@tonic-gate 		 */
2758*0Sstevel@tonic-gate 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2759*0Sstevel@tonic-gate 			persistent = 1;
2760*0Sstevel@tonic-gate 		}
2761*0Sstevel@tonic-gate 
2762*0Sstevel@tonic-gate 		/*
2763*0Sstevel@tonic-gate 		 * check for CE persistence
2764*0Sstevel@tonic-gate 		 */
2765*0Sstevel@tonic-gate 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2766*0Sstevel@tonic-gate 			if ((udb & P_DER_E_SYND) ==
2767*0Sstevel@tonic-gate 			    (ecc->flt_synd & P_DER_E_SYND)) {
2768*0Sstevel@tonic-gate 				persistent = 1;
2769*0Sstevel@tonic-gate 			}
2770*0Sstevel@tonic-gate 		}
2771*0Sstevel@tonic-gate 	}
2772*0Sstevel@tonic-gate 	return (persistent);
2773*0Sstevel@tonic-gate }
2774*0Sstevel@tonic-gate 
2775*0Sstevel@tonic-gate #ifdef HUMMINGBIRD
2776*0Sstevel@tonic-gate #define	HB_FULL_DIV		1
2777*0Sstevel@tonic-gate #define	HB_HALF_DIV		2
2778*0Sstevel@tonic-gate #define	HB_LOWEST_DIV		8
2779*0Sstevel@tonic-gate #define	HB_ECLK_INVALID		0xdeadbad
2780*0Sstevel@tonic-gate static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2781*0Sstevel@tonic-gate 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2782*0Sstevel@tonic-gate 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2783*0Sstevel@tonic-gate 	HB_ECLK_8 };
2784*0Sstevel@tonic-gate 
2785*0Sstevel@tonic-gate #define	HB_SLOW_DOWN		0
2786*0Sstevel@tonic-gate #define	HB_SPEED_UP		1
2787*0Sstevel@tonic-gate 
2788*0Sstevel@tonic-gate #define	SET_ESTAR_MODE(mode)					\
2789*0Sstevel@tonic-gate 	stdphysio(HB_ESTAR_MODE, (mode));			\
2790*0Sstevel@tonic-gate 	/*							\
2791*0Sstevel@tonic-gate 	 * PLL logic requires minimum of 16 clock		\
2792*0Sstevel@tonic-gate 	 * cycles to lock to the new clock speed.		\
2793*0Sstevel@tonic-gate 	 * Wait 1 usec to satisfy this requirement.		\
2794*0Sstevel@tonic-gate 	 */							\
2795*0Sstevel@tonic-gate 	drv_usecwait(1);
2796*0Sstevel@tonic-gate 
2797*0Sstevel@tonic-gate #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2798*0Sstevel@tonic-gate {								\
2799*0Sstevel@tonic-gate 	volatile uint64_t data;					\
2800*0Sstevel@tonic-gate 	uint64_t count, new_count;				\
2801*0Sstevel@tonic-gate 	clock_t delay;						\
2802*0Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
2803*0Sstevel@tonic-gate 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2804*0Sstevel@tonic-gate 	    HB_REFRESH_COUNT_SHIFT;				\
2805*0Sstevel@tonic-gate 	new_count = (HB_REFRESH_INTERVAL *			\
2806*0Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2807*0Sstevel@tonic-gate 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2808*0Sstevel@tonic-gate 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2809*0Sstevel@tonic-gate 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2810*0Sstevel@tonic-gate 	stdphysio(HB_MEM_CNTRL0, data);				\
2811*0Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);        		\
2812*0Sstevel@tonic-gate 	/*							\
2813*0Sstevel@tonic-gate 	 * If we are slowing down the cpu and Memory		\
2814*0Sstevel@tonic-gate 	 * Self Refresh is not enabled, it is required		\
2815*0Sstevel@tonic-gate 	 * to wait for old refresh count to count-down and	\
2816*0Sstevel@tonic-gate 	 * new refresh count to go into effect (let new value	\
2817*0Sstevel@tonic-gate 	 * counts down once).					\
2818*0Sstevel@tonic-gate 	 */							\
2819*0Sstevel@tonic-gate 	if ((direction) == HB_SLOW_DOWN &&			\
2820*0Sstevel@tonic-gate 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2821*0Sstevel@tonic-gate 		/*						\
2822*0Sstevel@tonic-gate 		 * Each count takes 64 cpu clock cycles		\
2823*0Sstevel@tonic-gate 		 * to decrement.  Wait for current refresh	\
2824*0Sstevel@tonic-gate 		 * count plus new refresh count at current	\
2825*0Sstevel@tonic-gate 		 * cpu speed to count down to zero.  Round	\
2826*0Sstevel@tonic-gate 		 * up the delay time.				\
2827*0Sstevel@tonic-gate 		 */						\
2828*0Sstevel@tonic-gate 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2829*0Sstevel@tonic-gate 		    (count + new_count) * MICROSEC * (cur_div)) /\
2830*0Sstevel@tonic-gate 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2831*0Sstevel@tonic-gate 		drv_usecwait(delay);				\
2832*0Sstevel@tonic-gate 	}							\
2833*0Sstevel@tonic-gate }
2834*0Sstevel@tonic-gate 
2835*0Sstevel@tonic-gate #define	SET_SELF_REFRESH(bit)					\
2836*0Sstevel@tonic-gate {								\
2837*0Sstevel@tonic-gate 	volatile uint64_t data;					\
2838*0Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
2839*0Sstevel@tonic-gate 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2840*0Sstevel@tonic-gate 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2841*0Sstevel@tonic-gate 	stdphysio(HB_MEM_CNTRL0, data);				\
2842*0Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
2843*0Sstevel@tonic-gate }
2844*0Sstevel@tonic-gate #endif	/* HUMMINGBIRD */
2845*0Sstevel@tonic-gate 
2846*0Sstevel@tonic-gate /* ARGSUSED */
2847*0Sstevel@tonic-gate void
2848*0Sstevel@tonic-gate cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2849*0Sstevel@tonic-gate {
2850*0Sstevel@tonic-gate #ifdef HUMMINGBIRD
2851*0Sstevel@tonic-gate 	uint64_t cur_mask, cur_divisor = 0;
2852*0Sstevel@tonic-gate 	volatile uint64_t reg;
2853*0Sstevel@tonic-gate 	int index;
2854*0Sstevel@tonic-gate 
2855*0Sstevel@tonic-gate 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2856*0Sstevel@tonic-gate 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2857*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2858*0Sstevel@tonic-gate 		    new_divisor);
2859*0Sstevel@tonic-gate 		return;
2860*0Sstevel@tonic-gate 	}
2861*0Sstevel@tonic-gate 
2862*0Sstevel@tonic-gate 	reg = lddphysio(HB_ESTAR_MODE);
2863*0Sstevel@tonic-gate 	cur_mask = reg & HB_ECLK_MASK;
2864*0Sstevel@tonic-gate 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2865*0Sstevel@tonic-gate 		if (hb_eclk[index] == cur_mask) {
2866*0Sstevel@tonic-gate 			cur_divisor = index;
2867*0Sstevel@tonic-gate 			break;
2868*0Sstevel@tonic-gate 		}
2869*0Sstevel@tonic-gate 	}
2870*0Sstevel@tonic-gate 
2871*0Sstevel@tonic-gate 	if (cur_divisor == 0)
2872*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2873*0Sstevel@tonic-gate 		    "can't be determined!");
2874*0Sstevel@tonic-gate 
2875*0Sstevel@tonic-gate 	/*
2876*0Sstevel@tonic-gate 	 * If we are already at the requested divisor speed, just
2877*0Sstevel@tonic-gate 	 * return.
2878*0Sstevel@tonic-gate 	 */
2879*0Sstevel@tonic-gate 	if (cur_divisor == new_divisor)
2880*0Sstevel@tonic-gate 		return;
2881*0Sstevel@tonic-gate 
2882*0Sstevel@tonic-gate 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2883*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2884*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2885*0Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2886*0Sstevel@tonic-gate 
2887*0Sstevel@tonic-gate 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2888*0Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2889*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2890*0Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2891*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2892*0Sstevel@tonic-gate 
2893*0Sstevel@tonic-gate 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2894*0Sstevel@tonic-gate 		/*
2895*0Sstevel@tonic-gate 		 * Transition to 1/2 speed first, then to
2896*0Sstevel@tonic-gate 		 * lower speed.
2897*0Sstevel@tonic-gate 		 */
2898*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2899*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2900*0Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2901*0Sstevel@tonic-gate 
2902*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2903*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2904*0Sstevel@tonic-gate 
2905*0Sstevel@tonic-gate 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2906*0Sstevel@tonic-gate 		/*
2907*0Sstevel@tonic-gate 		 * Transition to 1/2 speed first, then to
2908*0Sstevel@tonic-gate 		 * full speed.
2909*0Sstevel@tonic-gate 		 */
2910*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2911*0Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2912*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2913*0Sstevel@tonic-gate 
2914*0Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2915*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2916*0Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2917*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2918*0Sstevel@tonic-gate 
2919*0Sstevel@tonic-gate 	} else if (cur_divisor < new_divisor) {
2920*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2921*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2922*0Sstevel@tonic-gate 
2923*0Sstevel@tonic-gate 	} else if (cur_divisor > new_divisor) {
2924*0Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2925*0Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2926*0Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2927*0Sstevel@tonic-gate 	}
2928*0Sstevel@tonic-gate 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2929*0Sstevel@tonic-gate #endif
2930*0Sstevel@tonic-gate }
2931*0Sstevel@tonic-gate 
2932*0Sstevel@tonic-gate /*
2933*0Sstevel@tonic-gate  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2934*0Sstevel@tonic-gate  * we clear all the sticky bits. If a non-null pointer to a async fault
2935*0Sstevel@tonic-gate  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2936*0Sstevel@tonic-gate  * info will be returned in the structure.  If a non-null pointer to a
2937*0Sstevel@tonic-gate  * uint64_t is passed in, this will be updated if the CP bit is set in the
2938*0Sstevel@tonic-gate  * AFSR.  The afsr will be returned.
2939*0Sstevel@tonic-gate  */
2940*0Sstevel@tonic-gate static uint64_t
2941*0Sstevel@tonic-gate clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2942*0Sstevel@tonic-gate {
2943*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2944*0Sstevel@tonic-gate 	uint64_t afsr;
2945*0Sstevel@tonic-gate 	uint64_t udbh, udbl;
2946*0Sstevel@tonic-gate 
2947*0Sstevel@tonic-gate 	get_asyncflt(&afsr);
2948*0Sstevel@tonic-gate 
2949*0Sstevel@tonic-gate 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2950*0Sstevel@tonic-gate 		*acc_afsr |= afsr;
2951*0Sstevel@tonic-gate 
2952*0Sstevel@tonic-gate 	if (spf_flt != NULL) {
2953*0Sstevel@tonic-gate 		aflt->flt_stat = afsr;
2954*0Sstevel@tonic-gate 		get_asyncaddr(&aflt->flt_addr);
2955*0Sstevel@tonic-gate 		aflt->flt_addr &= SABRE_AFAR_PA;
2956*0Sstevel@tonic-gate 
2957*0Sstevel@tonic-gate 		get_udb_errors(&udbh, &udbl);
2958*0Sstevel@tonic-gate 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2959*0Sstevel@tonic-gate 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2960*0Sstevel@tonic-gate 	}
2961*0Sstevel@tonic-gate 
2962*0Sstevel@tonic-gate 	set_asyncflt(afsr);		/* clear afsr */
2963*0Sstevel@tonic-gate 	clr_datapath();			/* clear udbs */
2964*0Sstevel@tonic-gate 	return (afsr);
2965*0Sstevel@tonic-gate }
2966*0Sstevel@tonic-gate 
2967*0Sstevel@tonic-gate /*
2968*0Sstevel@tonic-gate  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2969*0Sstevel@tonic-gate  * tag of the first bad line will be returned. We also return the old-afsr
2970*0Sstevel@tonic-gate  * (before clearing the sticky bits). The linecnt data will be updated to
2971*0Sstevel@tonic-gate  * indicate the number of bad lines detected.
2972*0Sstevel@tonic-gate  */
2973*0Sstevel@tonic-gate static void
2974*0Sstevel@tonic-gate scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2975*0Sstevel@tonic-gate 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2976*0Sstevel@tonic-gate {
2977*0Sstevel@tonic-gate 	ec_data_t t_ecdata[8];
2978*0Sstevel@tonic-gate 	uint64_t t_etag, oafsr;
2979*0Sstevel@tonic-gate 	uint64_t pa = AFLT_INV_ADDR;
2980*0Sstevel@tonic-gate 	uint32_t i, j, ecache_sz;
2981*0Sstevel@tonic-gate 	uint64_t acc_afsr = 0;
2982*0Sstevel@tonic-gate 	uint64_t *cpu_afsr = NULL;
2983*0Sstevel@tonic-gate 
2984*0Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) != NULL)
2985*0Sstevel@tonic-gate 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2986*0Sstevel@tonic-gate 
2987*0Sstevel@tonic-gate 	*linecnt = 0;
2988*0Sstevel@tonic-gate 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2989*0Sstevel@tonic-gate 
2990*0Sstevel@tonic-gate 	for (i = 0; i < ecache_sz; i += 64) {
2991*0Sstevel@tonic-gate 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2992*0Sstevel@tonic-gate 		    cpu_afsr);
2993*0Sstevel@tonic-gate 		acc_afsr |= oafsr;
2994*0Sstevel@tonic-gate 
2995*0Sstevel@tonic-gate 		/*
2996*0Sstevel@tonic-gate 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2997*0Sstevel@tonic-gate 		 * looking for the first occurrence of an EDP error.  The AFSR
2998*0Sstevel@tonic-gate 		 * info is captured for each 8-byte chunk.  Note that for
2999*0Sstevel@tonic-gate 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
3000*0Sstevel@tonic-gate 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
3001*0Sstevel@tonic-gate 		 * for the high and low 8-byte words within the 16-byte chunk).
3002*0Sstevel@tonic-gate 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
3003*0Sstevel@tonic-gate 		 * granularity and only PSYND bits [7:0] are used.
3004*0Sstevel@tonic-gate 		 */
3005*0Sstevel@tonic-gate 		for (j = 0; j < 8; j++) {
3006*0Sstevel@tonic-gate 			ec_data_t *ecdptr = &t_ecdata[j];
3007*0Sstevel@tonic-gate 
3008*0Sstevel@tonic-gate 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
3009*0Sstevel@tonic-gate 				uint64_t errpa;
3010*0Sstevel@tonic-gate 				ushort_t psynd;
3011*0Sstevel@tonic-gate 				uint32_t ec_set_size = ecache_sz /
3012*0Sstevel@tonic-gate 				    ecache_associativity;
3013*0Sstevel@tonic-gate 
3014*0Sstevel@tonic-gate 				/*
3015*0Sstevel@tonic-gate 				 * For Spitfire/Blackbird, we need to look at
3016*0Sstevel@tonic-gate 				 * the PSYND to make sure that this 8-byte chunk
3017*0Sstevel@tonic-gate 				 * is the right one.  PSYND bits [15:8] belong
3018*0Sstevel@tonic-gate 				 * to the upper 8-byte (even) chunk.  Bits
3019*0Sstevel@tonic-gate 				 * [7:0] belong to the lower 8-byte chunk (odd).
3020*0Sstevel@tonic-gate 				 */
3021*0Sstevel@tonic-gate 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3022*0Sstevel@tonic-gate 				if (!isus2i && !isus2e) {
3023*0Sstevel@tonic-gate 					if (j & 0x1)
3024*0Sstevel@tonic-gate 						psynd = psynd & 0xFF;
3025*0Sstevel@tonic-gate 					else
3026*0Sstevel@tonic-gate 						psynd = psynd >> 8;
3027*0Sstevel@tonic-gate 
3028*0Sstevel@tonic-gate 					if (!psynd)
3029*0Sstevel@tonic-gate 						continue; /* wrong chunk */
3030*0Sstevel@tonic-gate 				}
3031*0Sstevel@tonic-gate 
3032*0Sstevel@tonic-gate 				/* Construct the PA */
3033*0Sstevel@tonic-gate 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3034*0Sstevel@tonic-gate 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3035*0Sstevel@tonic-gate 				    ec_set_size);
3036*0Sstevel@tonic-gate 
3037*0Sstevel@tonic-gate 				/* clean up the cache line */
3038*0Sstevel@tonic-gate 				flushecacheline(P2ALIGN(errpa, 64),
3039*0Sstevel@tonic-gate 					cpunodes[CPU->cpu_id].ecache_size);
3040*0Sstevel@tonic-gate 
3041*0Sstevel@tonic-gate 				oafsr = clear_errors(NULL, cpu_afsr);
3042*0Sstevel@tonic-gate 				acc_afsr |= oafsr;
3043*0Sstevel@tonic-gate 
3044*0Sstevel@tonic-gate 				(*linecnt)++;
3045*0Sstevel@tonic-gate 
3046*0Sstevel@tonic-gate 				/*
3047*0Sstevel@tonic-gate 				 * Capture the PA for the first bad line found.
3048*0Sstevel@tonic-gate 				 * Return the ecache dump and tag info.
3049*0Sstevel@tonic-gate 				 */
3050*0Sstevel@tonic-gate 				if (pa == AFLT_INV_ADDR) {
3051*0Sstevel@tonic-gate 					int k;
3052*0Sstevel@tonic-gate 
3053*0Sstevel@tonic-gate 					pa = errpa;
3054*0Sstevel@tonic-gate 					for (k = 0; k < 8; k++)
3055*0Sstevel@tonic-gate 						ecache_data[k] = t_ecdata[k];
3056*0Sstevel@tonic-gate 					*ecache_tag = t_etag;
3057*0Sstevel@tonic-gate 				}
3058*0Sstevel@tonic-gate 				break;
3059*0Sstevel@tonic-gate 			}
3060*0Sstevel@tonic-gate 		}
3061*0Sstevel@tonic-gate 	}
3062*0Sstevel@tonic-gate 	*t_afar = pa;
3063*0Sstevel@tonic-gate 	*t_afsr = acc_afsr;
3064*0Sstevel@tonic-gate }
3065*0Sstevel@tonic-gate 
3066*0Sstevel@tonic-gate static void
3067*0Sstevel@tonic-gate cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3068*0Sstevel@tonic-gate {
3069*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3070*0Sstevel@tonic-gate 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3071*0Sstevel@tonic-gate 	char linestr[30];
3072*0Sstevel@tonic-gate 	char *state_str;
3073*0Sstevel@tonic-gate 	int i;
3074*0Sstevel@tonic-gate 
3075*0Sstevel@tonic-gate 	/*
3076*0Sstevel@tonic-gate 	 * Check the ecache tag to make sure it
3077*0Sstevel@tonic-gate 	 * is valid. If invalid, a memory dump was
3078*0Sstevel@tonic-gate 	 * captured instead of a ecache dump.
3079*0Sstevel@tonic-gate 	 */
3080*0Sstevel@tonic-gate 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3081*0Sstevel@tonic-gate 		uchar_t eparity = (uchar_t)
3082*0Sstevel@tonic-gate 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3083*0Sstevel@tonic-gate 
3084*0Sstevel@tonic-gate 		uchar_t estate = (uchar_t)
3085*0Sstevel@tonic-gate 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3086*0Sstevel@tonic-gate 
3087*0Sstevel@tonic-gate 		if (estate == cpu_ec_state_shr)
3088*0Sstevel@tonic-gate 			state_str = "Shared";
3089*0Sstevel@tonic-gate 		else if (estate == cpu_ec_state_exl)
3090*0Sstevel@tonic-gate 			state_str = "Exclusive";
3091*0Sstevel@tonic-gate 		else if (estate == cpu_ec_state_own)
3092*0Sstevel@tonic-gate 			state_str = "Owner";
3093*0Sstevel@tonic-gate 		else if (estate == cpu_ec_state_mod)
3094*0Sstevel@tonic-gate 			state_str = "Modified";
3095*0Sstevel@tonic-gate 		else
3096*0Sstevel@tonic-gate 			state_str = "Invalid";
3097*0Sstevel@tonic-gate 
3098*0Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 1) {
3099*0Sstevel@tonic-gate 			(void) snprintf(linestr, sizeof (linestr),
3100*0Sstevel@tonic-gate 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3101*0Sstevel@tonic-gate 		} else {
3102*0Sstevel@tonic-gate 			linestr[0] = '\0';
3103*0Sstevel@tonic-gate 		}
3104*0Sstevel@tonic-gate 
3105*0Sstevel@tonic-gate 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3106*0Sstevel@tonic-gate 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3107*0Sstevel@tonic-gate 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3108*0Sstevel@tonic-gate 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3109*0Sstevel@tonic-gate 		    (uint32_t)ecache_tag, state_str,
3110*0Sstevel@tonic-gate 		    (uint32_t)eparity, linestr);
3111*0Sstevel@tonic-gate 	} else {
3112*0Sstevel@tonic-gate 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3113*0Sstevel@tonic-gate 		    " E$tag != PA from AFAR; E$line was victimized"
3114*0Sstevel@tonic-gate 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3115*0Sstevel@tonic-gate 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3116*0Sstevel@tonic-gate 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3117*0Sstevel@tonic-gate 	}
3118*0Sstevel@tonic-gate 
3119*0Sstevel@tonic-gate 	/*
3120*0Sstevel@tonic-gate 	 * Dump out all 8 8-byte ecache data captured
3121*0Sstevel@tonic-gate 	 * For each 8-byte data captured, we check the
3122*0Sstevel@tonic-gate 	 * captured afsr's parity syndrome to find out
3123*0Sstevel@tonic-gate 	 * which 8-byte chunk is bad. For memory dump, the
3124*0Sstevel@tonic-gate 	 * AFSR values were initialized to 0.
3125*0Sstevel@tonic-gate 	 */
3126*0Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
3127*0Sstevel@tonic-gate 		ec_data_t *ecdptr;
3128*0Sstevel@tonic-gate 		uint_t offset;
3129*0Sstevel@tonic-gate 		ushort_t psynd;
3130*0Sstevel@tonic-gate 		ushort_t bad;
3131*0Sstevel@tonic-gate 		uint64_t edp;
3132*0Sstevel@tonic-gate 
3133*0Sstevel@tonic-gate 		offset = i << 3;	/* multiply by 8 */
3134*0Sstevel@tonic-gate 		ecdptr = &spf_flt->flt_ec_data[i];
3135*0Sstevel@tonic-gate 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3136*0Sstevel@tonic-gate 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3137*0Sstevel@tonic-gate 
3138*0Sstevel@tonic-gate 		/*
3139*0Sstevel@tonic-gate 		 * For Sabre/Hummingbird, parity synd is captured only
3140*0Sstevel@tonic-gate 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3141*0Sstevel@tonic-gate 		 * For spitfire/blackbird, AFSR.PSYND is captured
3142*0Sstevel@tonic-gate 		 * in 16-byte granularity. [15:8] represent
3143*0Sstevel@tonic-gate 		 * the upper 8 byte and [7:0] the lower 8 byte.
3144*0Sstevel@tonic-gate 		 */
3145*0Sstevel@tonic-gate 		if (isus2i || isus2e || (i & 0x1))
3146*0Sstevel@tonic-gate 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3147*0Sstevel@tonic-gate 		else
3148*0Sstevel@tonic-gate 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3149*0Sstevel@tonic-gate 
3150*0Sstevel@tonic-gate 		if (bad && edp) {
3151*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3152*0Sstevel@tonic-gate 			    " E$Data (0x%02x): 0x%08x.%08x "
3153*0Sstevel@tonic-gate 			    "*Bad* PSYND=0x%04x", offset,
3154*0Sstevel@tonic-gate 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3155*0Sstevel@tonic-gate 			    (uint32_t)ecdptr->ec_d8, psynd);
3156*0Sstevel@tonic-gate 		} else {
3157*0Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3158*0Sstevel@tonic-gate 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3159*0Sstevel@tonic-gate 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3160*0Sstevel@tonic-gate 			    (uint32_t)ecdptr->ec_d8);
3161*0Sstevel@tonic-gate 		}
3162*0Sstevel@tonic-gate 	}
3163*0Sstevel@tonic-gate }
3164*0Sstevel@tonic-gate 
3165*0Sstevel@tonic-gate /*
3166*0Sstevel@tonic-gate  * Common logging function for all cpu async errors.  This function allows the
3167*0Sstevel@tonic-gate  * caller to generate a single cmn_err() call that logs the appropriate items
3168*0Sstevel@tonic-gate  * from the fault structure, and implements our rules for AFT logging levels.
3169*0Sstevel@tonic-gate  *
3170*0Sstevel@tonic-gate  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3171*0Sstevel@tonic-gate  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3172*0Sstevel@tonic-gate  *	spflt: pointer to spitfire async fault structure
3173*0Sstevel@tonic-gate  *	logflags: bitflags indicating what to output
3174*0Sstevel@tonic-gate  *	endstr: a end string to appear at the end of this log
3175*0Sstevel@tonic-gate  *	fmt: a format string to appear at the beginning of the log
3176*0Sstevel@tonic-gate  *
3177*0Sstevel@tonic-gate  * The logflags allows the construction of predetermined output from the spflt
3178*0Sstevel@tonic-gate  * structure.  The individual data items always appear in a consistent order.
3179*0Sstevel@tonic-gate  * Note that either or both of the spflt structure pointer and logflags may be
3180*0Sstevel@tonic-gate  * NULL or zero respectively, indicating that the predetermined output
3181*0Sstevel@tonic-gate  * substrings are not requested in this log.  The output looks like this:
3182*0Sstevel@tonic-gate  *
3183*0Sstevel@tonic-gate  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3184*0Sstevel@tonic-gate  *	<CPU_SPACE><CPU_ERRID>
3185*0Sstevel@tonic-gate  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3186*0Sstevel@tonic-gate  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3187*0Sstevel@tonic-gate  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3188*0Sstevel@tonic-gate  *	newline+4spaces<CPU_SYND>
3189*0Sstevel@tonic-gate  *	newline+4spaces<endstr>
3190*0Sstevel@tonic-gate  *
3191*0Sstevel@tonic-gate  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3192*0Sstevel@tonic-gate  * it is assumed that <endstr> will be the unum string in this case.  The size
3193*0Sstevel@tonic-gate  * of our intermediate formatting buf[] is based on the worst case of all flags
3194*0Sstevel@tonic-gate  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3195*0Sstevel@tonic-gate  * formatting so we don't need additional stack space to format them here.
3196*0Sstevel@tonic-gate  */
3197*0Sstevel@tonic-gate /*PRINTFLIKE6*/
3198*0Sstevel@tonic-gate static void
3199*0Sstevel@tonic-gate cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3200*0Sstevel@tonic-gate 	const char *endstr, const char *fmt, ...)
3201*0Sstevel@tonic-gate {
3202*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spflt;
3203*0Sstevel@tonic-gate 	char buf[400], *p, *q; /* see comments about buf[] size above */
3204*0Sstevel@tonic-gate 	va_list ap;
3205*0Sstevel@tonic-gate 	int console_log_flag;
3206*0Sstevel@tonic-gate 
3207*0Sstevel@tonic-gate 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3208*0Sstevel@tonic-gate 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3209*0Sstevel@tonic-gate 	    (aflt->flt_panic)) {
3210*0Sstevel@tonic-gate 		console_log_flag = (tagnum < 2) || aft_verbose;
3211*0Sstevel@tonic-gate 	} else {
3212*0Sstevel@tonic-gate 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3213*0Sstevel@tonic-gate 		    (aflt->flt_stat & P_AFSR_CE)) ?
3214*0Sstevel@tonic-gate 		    ce_verbose_memory : ce_verbose_other;
3215*0Sstevel@tonic-gate 
3216*0Sstevel@tonic-gate 		if (!verbose)
3217*0Sstevel@tonic-gate 			return;
3218*0Sstevel@tonic-gate 
3219*0Sstevel@tonic-gate 		console_log_flag = (verbose > 1);
3220*0Sstevel@tonic-gate 	}
3221*0Sstevel@tonic-gate 
3222*0Sstevel@tonic-gate 	if (console_log_flag)
3223*0Sstevel@tonic-gate 		(void) sprintf(buf, "[AFT%d]", tagnum);
3224*0Sstevel@tonic-gate 	else
3225*0Sstevel@tonic-gate 		(void) sprintf(buf, "![AFT%d]", tagnum);
3226*0Sstevel@tonic-gate 
3227*0Sstevel@tonic-gate 	p = buf + strlen(buf);	/* current buffer position */
3228*0Sstevel@tonic-gate 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3229*0Sstevel@tonic-gate 
3230*0Sstevel@tonic-gate 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3231*0Sstevel@tonic-gate 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3232*0Sstevel@tonic-gate 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3233*0Sstevel@tonic-gate 		p += strlen(p);
3234*0Sstevel@tonic-gate 	}
3235*0Sstevel@tonic-gate 
3236*0Sstevel@tonic-gate 	/*
3237*0Sstevel@tonic-gate 	 * Copy the caller's format string verbatim into buf[].  It will be
3238*0Sstevel@tonic-gate 	 * formatted by the call to vcmn_err() at the end of this function.
3239*0Sstevel@tonic-gate 	 */
3240*0Sstevel@tonic-gate 	if (fmt != NULL && p < q) {
3241*0Sstevel@tonic-gate 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3242*0Sstevel@tonic-gate 		buf[sizeof (buf) - 1] = '\0';
3243*0Sstevel@tonic-gate 		p += strlen(p);
3244*0Sstevel@tonic-gate 	}
3245*0Sstevel@tonic-gate 
3246*0Sstevel@tonic-gate 	if (spflt != NULL) {
3247*0Sstevel@tonic-gate 		if (logflags & CPU_FLTCPU) {
3248*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3249*0Sstevel@tonic-gate 			    aflt->flt_inst);
3250*0Sstevel@tonic-gate 			p += strlen(p);
3251*0Sstevel@tonic-gate 		}
3252*0Sstevel@tonic-gate 
3253*0Sstevel@tonic-gate 		if (logflags & CPU_SPACE) {
3254*0Sstevel@tonic-gate 			if (aflt->flt_status & ECC_D_TRAP)
3255*0Sstevel@tonic-gate 				(void) snprintf(p, (size_t)(q - p),
3256*0Sstevel@tonic-gate 				    " Data access");
3257*0Sstevel@tonic-gate 			else if (aflt->flt_status & ECC_I_TRAP)
3258*0Sstevel@tonic-gate 				(void) snprintf(p, (size_t)(q - p),
3259*0Sstevel@tonic-gate 				    " Instruction access");
3260*0Sstevel@tonic-gate 			p += strlen(p);
3261*0Sstevel@tonic-gate 		}
3262*0Sstevel@tonic-gate 
3263*0Sstevel@tonic-gate 		if (logflags & CPU_TL) {
3264*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3265*0Sstevel@tonic-gate 			    aflt->flt_tl ? ">0" : "=0");
3266*0Sstevel@tonic-gate 			p += strlen(p);
3267*0Sstevel@tonic-gate 		}
3268*0Sstevel@tonic-gate 
3269*0Sstevel@tonic-gate 		if (logflags & CPU_ERRID) {
3270*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3271*0Sstevel@tonic-gate 			    ", errID 0x%08x.%08x",
3272*0Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_id >> 32),
3273*0Sstevel@tonic-gate 			    (uint32_t)aflt->flt_id);
3274*0Sstevel@tonic-gate 			p += strlen(p);
3275*0Sstevel@tonic-gate 		}
3276*0Sstevel@tonic-gate 
3277*0Sstevel@tonic-gate 		if (logflags & CPU_AFSR) {
3278*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3279*0Sstevel@tonic-gate 			    "\n    AFSR 0x%08b.%08b",
3280*0Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3281*0Sstevel@tonic-gate 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3282*0Sstevel@tonic-gate 			p += strlen(p);
3283*0Sstevel@tonic-gate 		}
3284*0Sstevel@tonic-gate 
3285*0Sstevel@tonic-gate 		if (logflags & CPU_AFAR) {
3286*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3287*0Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_addr >> 32),
3288*0Sstevel@tonic-gate 			    (uint32_t)aflt->flt_addr);
3289*0Sstevel@tonic-gate 			p += strlen(p);
3290*0Sstevel@tonic-gate 		}
3291*0Sstevel@tonic-gate 
3292*0Sstevel@tonic-gate 		if (logflags & CPU_AF_PSYND) {
3293*0Sstevel@tonic-gate 			ushort_t psynd = (ushort_t)
3294*0Sstevel@tonic-gate 			    (aflt->flt_stat & P_AFSR_P_SYND);
3295*0Sstevel@tonic-gate 
3296*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3297*0Sstevel@tonic-gate 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3298*0Sstevel@tonic-gate 			    psynd, ecc_psynd_score(psynd));
3299*0Sstevel@tonic-gate 			p += strlen(p);
3300*0Sstevel@tonic-gate 		}
3301*0Sstevel@tonic-gate 
3302*0Sstevel@tonic-gate 		if (logflags & CPU_AF_ETS) {
3303*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3304*0Sstevel@tonic-gate 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3305*0Sstevel@tonic-gate 			p += strlen(p);
3306*0Sstevel@tonic-gate 		}
3307*0Sstevel@tonic-gate 
3308*0Sstevel@tonic-gate 		if (logflags & CPU_FAULTPC) {
3309*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3310*0Sstevel@tonic-gate 			    (void *)aflt->flt_pc);
3311*0Sstevel@tonic-gate 			p += strlen(p);
3312*0Sstevel@tonic-gate 		}
3313*0Sstevel@tonic-gate 
3314*0Sstevel@tonic-gate 		if (logflags & CPU_UDBH) {
3315*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3316*0Sstevel@tonic-gate 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3317*0Sstevel@tonic-gate 			    spflt->flt_sdbh, UDB_FMTSTR,
3318*0Sstevel@tonic-gate 			    spflt->flt_sdbh & 0xFF);
3319*0Sstevel@tonic-gate 			p += strlen(p);
3320*0Sstevel@tonic-gate 		}
3321*0Sstevel@tonic-gate 
3322*0Sstevel@tonic-gate 		if (logflags & CPU_UDBL) {
3323*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3324*0Sstevel@tonic-gate 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3325*0Sstevel@tonic-gate 			    spflt->flt_sdbl, UDB_FMTSTR,
3326*0Sstevel@tonic-gate 			    spflt->flt_sdbl & 0xFF);
3327*0Sstevel@tonic-gate 			p += strlen(p);
3328*0Sstevel@tonic-gate 		}
3329*0Sstevel@tonic-gate 
3330*0Sstevel@tonic-gate 		if (logflags & CPU_SYND) {
3331*0Sstevel@tonic-gate 			ushort_t synd = SYND(aflt->flt_synd);
3332*0Sstevel@tonic-gate 
3333*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
3334*0Sstevel@tonic-gate 			    "\n    %s Syndrome 0x%x Memory Module ",
3335*0Sstevel@tonic-gate 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3336*0Sstevel@tonic-gate 			p += strlen(p);
3337*0Sstevel@tonic-gate 		}
3338*0Sstevel@tonic-gate 	}
3339*0Sstevel@tonic-gate 
3340*0Sstevel@tonic-gate 	if (endstr != NULL) {
3341*0Sstevel@tonic-gate 		if (!(logflags & CPU_SYND))
3342*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3343*0Sstevel@tonic-gate 		else
3344*0Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3345*0Sstevel@tonic-gate 		p += strlen(p);
3346*0Sstevel@tonic-gate 	}
3347*0Sstevel@tonic-gate 
3348*0Sstevel@tonic-gate 	if (ce_code == CE_CONT && (p < q - 1))
3349*0Sstevel@tonic-gate 		(void) strcpy(p, "\n"); /* add final \n if needed */
3350*0Sstevel@tonic-gate 
3351*0Sstevel@tonic-gate 	va_start(ap, fmt);
3352*0Sstevel@tonic-gate 	vcmn_err(ce_code, buf, ap);
3353*0Sstevel@tonic-gate 	va_end(ap);
3354*0Sstevel@tonic-gate }
3355*0Sstevel@tonic-gate 
3356*0Sstevel@tonic-gate /*
3357*0Sstevel@tonic-gate  * Ecache Scrubbing
3358*0Sstevel@tonic-gate  *
3359*0Sstevel@tonic-gate  * The basic idea is to prevent lines from sitting in the ecache long enough
3360*0Sstevel@tonic-gate  * to build up soft errors which can lead to ecache parity errors.
3361*0Sstevel@tonic-gate  *
3362*0Sstevel@tonic-gate  * The following rules are observed when flushing the ecache:
3363*0Sstevel@tonic-gate  *
3364*0Sstevel@tonic-gate  * 1. When the system is busy, flush bad clean lines
3365*0Sstevel@tonic-gate  * 2. When the system is idle, flush all clean lines
3366*0Sstevel@tonic-gate  * 3. When the system is idle, flush good dirty lines
3367*0Sstevel@tonic-gate  * 4. Never flush bad dirty lines.
3368*0Sstevel@tonic-gate  *
3369*0Sstevel@tonic-gate  *	modify	parity	busy   idle
3370*0Sstevel@tonic-gate  *	----------------------------
3371*0Sstevel@tonic-gate  *	clean	good		X
3372*0Sstevel@tonic-gate  * 	clean	bad	X	X
3373*0Sstevel@tonic-gate  * 	dirty	good		X
3374*0Sstevel@tonic-gate  *	dirty	bad
3375*0Sstevel@tonic-gate  *
3376*0Sstevel@tonic-gate  * Bad or good refers to whether a line has an E$ parity error or not.
3377*0Sstevel@tonic-gate  * Clean or dirty refers to the state of the modified bit.  We currently
3378*0Sstevel@tonic-gate  * default the scan rate to 100 (scan 10% of the cache per second).
3379*0Sstevel@tonic-gate  *
3380*0Sstevel@tonic-gate  * The following are E$ states and actions.
3381*0Sstevel@tonic-gate  *
3382*0Sstevel@tonic-gate  * We encode our state as a 3-bit number, consisting of:
3383*0Sstevel@tonic-gate  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3384*0Sstevel@tonic-gate  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3385*0Sstevel@tonic-gate  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3386*0Sstevel@tonic-gate  *
3387*0Sstevel@tonic-gate  * We associate a flushing and a logging action with each state.
3388*0Sstevel@tonic-gate  *
3389*0Sstevel@tonic-gate  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3390*0Sstevel@tonic-gate  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3391*0Sstevel@tonic-gate  * E$ only, in addition to value being set by ec_flush.
3392*0Sstevel@tonic-gate  */
3393*0Sstevel@tonic-gate 
3394*0Sstevel@tonic-gate #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3395*0Sstevel@tonic-gate #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3396*0Sstevel@tonic-gate #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3397*0Sstevel@tonic-gate 
3398*0Sstevel@tonic-gate struct {
3399*0Sstevel@tonic-gate 	char	ec_flush;		/* whether to flush or not */
3400*0Sstevel@tonic-gate 	char	ec_log;			/* ecache logging */
3401*0Sstevel@tonic-gate 	char	ec_log_type;		/* log type info */
3402*0Sstevel@tonic-gate } ec_action[] = {	/* states of the E$ line in M P B */
3403*0Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3404*0Sstevel@tonic-gate 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3405*0Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3406*0Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3407*0Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3408*0Sstevel@tonic-gate 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3409*0Sstevel@tonic-gate 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3410*0Sstevel@tonic-gate 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3411*0Sstevel@tonic-gate };
3412*0Sstevel@tonic-gate 
3413*0Sstevel@tonic-gate /*
3414*0Sstevel@tonic-gate  * Offsets into the ec_action[] that determines clean_good_busy and
3415*0Sstevel@tonic-gate  * dirty_good_busy lines.
3416*0Sstevel@tonic-gate  */
3417*0Sstevel@tonic-gate #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3418*0Sstevel@tonic-gate #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3419*0Sstevel@tonic-gate 
3420*0Sstevel@tonic-gate /*
3421*0Sstevel@tonic-gate  * We are flushing lines which are Clean_Good_Busy and also the lines
3422*0Sstevel@tonic-gate  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3423*0Sstevel@tonic-gate  */
3424*0Sstevel@tonic-gate #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3425*0Sstevel@tonic-gate #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3426*0Sstevel@tonic-gate 
3427*0Sstevel@tonic-gate #define	ECACHE_STATE_MODIFIED	0x4
3428*0Sstevel@tonic-gate #define	ECACHE_STATE_PARITY	0x2
3429*0Sstevel@tonic-gate #define	ECACHE_STATE_BUSY	0x1
3430*0Sstevel@tonic-gate 
3431*0Sstevel@tonic-gate /*
3432*0Sstevel@tonic-gate  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3433*0Sstevel@tonic-gate  */
3434*0Sstevel@tonic-gate int ecache_calls_a_sec_mirrored = 1;
3435*0Sstevel@tonic-gate int ecache_lines_per_call_mirrored = 1;
3436*0Sstevel@tonic-gate 
3437*0Sstevel@tonic-gate int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3438*0Sstevel@tonic-gate int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3439*0Sstevel@tonic-gate int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3440*0Sstevel@tonic-gate int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3441*0Sstevel@tonic-gate int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3442*0Sstevel@tonic-gate int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3443*0Sstevel@tonic-gate int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3444*0Sstevel@tonic-gate int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3445*0Sstevel@tonic-gate 
3446*0Sstevel@tonic-gate volatile int ec_timeout_calls = 1;	/* timeout calls */
3447*0Sstevel@tonic-gate 
3448*0Sstevel@tonic-gate /*
3449*0Sstevel@tonic-gate  * Interrupt number and pil for ecache scrubber cross-trap calls.
3450*0Sstevel@tonic-gate  */
3451*0Sstevel@tonic-gate static uint_t ecache_scrub_inum;
3452*0Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9;
3453*0Sstevel@tonic-gate 
3454*0Sstevel@tonic-gate /*
3455*0Sstevel@tonic-gate  * Kstats for the E$ scrubber.
3456*0Sstevel@tonic-gate  */
3457*0Sstevel@tonic-gate typedef struct ecache_kstat {
3458*0Sstevel@tonic-gate 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3459*0Sstevel@tonic-gate 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3460*0Sstevel@tonic-gate 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3461*0Sstevel@tonic-gate 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3462*0Sstevel@tonic-gate 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3463*0Sstevel@tonic-gate 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3464*0Sstevel@tonic-gate 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3465*0Sstevel@tonic-gate 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3466*0Sstevel@tonic-gate 	kstat_named_t invalid_lines;		/* # of invalid lines */
3467*0Sstevel@tonic-gate 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3468*0Sstevel@tonic-gate 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3469*0Sstevel@tonic-gate 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3470*0Sstevel@tonic-gate } ecache_kstat_t;
3471*0Sstevel@tonic-gate 
3472*0Sstevel@tonic-gate static ecache_kstat_t ec_kstat_template = {
3473*0Sstevel@tonic-gate 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3474*0Sstevel@tonic-gate 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3475*0Sstevel@tonic-gate 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3476*0Sstevel@tonic-gate 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3477*0Sstevel@tonic-gate 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3478*0Sstevel@tonic-gate 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3479*0Sstevel@tonic-gate 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3480*0Sstevel@tonic-gate 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3481*0Sstevel@tonic-gate 	{ "invalid_lines", KSTAT_DATA_ULONG },
3482*0Sstevel@tonic-gate 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3483*0Sstevel@tonic-gate 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3484*0Sstevel@tonic-gate 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3485*0Sstevel@tonic-gate };
3486*0Sstevel@tonic-gate 
3487*0Sstevel@tonic-gate struct kmem_cache *sf_private_cache;
3488*0Sstevel@tonic-gate 
3489*0Sstevel@tonic-gate /*
3490*0Sstevel@tonic-gate  * Called periodically on each CPU to scan the ecache once a sec.
3491*0Sstevel@tonic-gate  * adjusting the ecache line index appropriately
3492*0Sstevel@tonic-gate  */
3493*0Sstevel@tonic-gate void
3494*0Sstevel@tonic-gate scrub_ecache_line()
3495*0Sstevel@tonic-gate {
3496*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3497*0Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
3498*0Sstevel@tonic-gate 	uint32_t index = ssmp->ecache_flush_index;
3499*0Sstevel@tonic-gate 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3500*0Sstevel@tonic-gate 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3501*0Sstevel@tonic-gate 	int nlines = ssmp->ecache_nlines;
3502*0Sstevel@tonic-gate 	uint32_t ec_set_size = ec_size / ecache_associativity;
3503*0Sstevel@tonic-gate 	int ec_mirror = ssmp->ecache_mirror;
3504*0Sstevel@tonic-gate 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3505*0Sstevel@tonic-gate 
3506*0Sstevel@tonic-gate 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3507*0Sstevel@tonic-gate 	int mpb;		/* encode Modified, Parity, Busy for action */
3508*0Sstevel@tonic-gate 	uchar_t state;
3509*0Sstevel@tonic-gate 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3510*0Sstevel@tonic-gate 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3511*0Sstevel@tonic-gate 	ec_data_t ec_data[8];
3512*0Sstevel@tonic-gate 	kstat_named_t *ec_knp;
3513*0Sstevel@tonic-gate 
3514*0Sstevel@tonic-gate 	switch (ec_mirror) {
3515*0Sstevel@tonic-gate 		default:
3516*0Sstevel@tonic-gate 		case ECACHE_CPU_NON_MIRROR:
3517*0Sstevel@tonic-gate 			/*
3518*0Sstevel@tonic-gate 			 * The E$ scan rate is expressed in units of tenths of
3519*0Sstevel@tonic-gate 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3520*0Sstevel@tonic-gate 			 * whole cache is scanned every second.
3521*0Sstevel@tonic-gate 			 */
3522*0Sstevel@tonic-gate 			scan_lines = (nlines * ecache_scan_rate) /
3523*0Sstevel@tonic-gate 					(1000 * ecache_calls_a_sec);
3524*0Sstevel@tonic-gate 			if (!(ssmp->ecache_busy)) {
3525*0Sstevel@tonic-gate 				if (ecache_idle_factor > 0) {
3526*0Sstevel@tonic-gate 					scan_lines *= ecache_idle_factor;
3527*0Sstevel@tonic-gate 				}
3528*0Sstevel@tonic-gate 			} else {
3529*0Sstevel@tonic-gate 				flush_clean_busy = (scan_lines *
3530*0Sstevel@tonic-gate 					ecache_flush_clean_good_busy) / 100;
3531*0Sstevel@tonic-gate 				flush_dirty_busy = (scan_lines *
3532*0Sstevel@tonic-gate 					ecache_flush_dirty_good_busy) / 100;
3533*0Sstevel@tonic-gate 			}
3534*0Sstevel@tonic-gate 
3535*0Sstevel@tonic-gate 			ec_timeout_calls = (ecache_calls_a_sec ?
3536*0Sstevel@tonic-gate 						ecache_calls_a_sec : 1);
3537*0Sstevel@tonic-gate 			break;
3538*0Sstevel@tonic-gate 
3539*0Sstevel@tonic-gate 		case ECACHE_CPU_MIRROR:
3540*0Sstevel@tonic-gate 			scan_lines = ecache_lines_per_call_mirrored;
3541*0Sstevel@tonic-gate 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3542*0Sstevel@tonic-gate 					ecache_calls_a_sec_mirrored : 1);
3543*0Sstevel@tonic-gate 			break;
3544*0Sstevel@tonic-gate 	}
3545*0Sstevel@tonic-gate 
3546*0Sstevel@tonic-gate 	/*
3547*0Sstevel@tonic-gate 	 * The ecache scrubber algorithm operates by reading and
3548*0Sstevel@tonic-gate 	 * decoding the E$ tag to determine whether the corresponding E$ line
3549*0Sstevel@tonic-gate 	 * can be scrubbed. There is a implicit assumption in the scrubber
3550*0Sstevel@tonic-gate 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3551*0Sstevel@tonic-gate 	 * flawed since the E$ tag may also be corrupted and have parity errors
3552*0Sstevel@tonic-gate 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3553*0Sstevel@tonic-gate 	 * before scrubbing. When a parity error is detected in the E$ tag,
3554*0Sstevel@tonic-gate 	 * it is possible to recover and scrub the tag under certain conditions
3555*0Sstevel@tonic-gate 	 * so that a ETP error condition can be avoided.
3556*0Sstevel@tonic-gate 	 */
3557*0Sstevel@tonic-gate 
3558*0Sstevel@tonic-gate 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3559*0Sstevel@tonic-gate 		/*
3560*0Sstevel@tonic-gate 		 * We get the old-AFSR before clearing the AFSR sticky bits
3561*0Sstevel@tonic-gate 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3562*0Sstevel@tonic-gate 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3563*0Sstevel@tonic-gate 		 */
3564*0Sstevel@tonic-gate 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3565*0Sstevel@tonic-gate 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3566*0Sstevel@tonic-gate 				cpu_ec_state_shift);
3567*0Sstevel@tonic-gate 
3568*0Sstevel@tonic-gate 		/*
3569*0Sstevel@tonic-gate 		 * ETP is set try to scrub the ecache tag.
3570*0Sstevel@tonic-gate 		 */
3571*0Sstevel@tonic-gate 		if (nafsr & P_AFSR_ETP) {
3572*0Sstevel@tonic-gate 			ecache_scrub_tag_err(nafsr, state, index);
3573*0Sstevel@tonic-gate 		} else if (state & cpu_ec_state_valid) {
3574*0Sstevel@tonic-gate 			/*
3575*0Sstevel@tonic-gate 			 * ETP is not set, E$ tag is valid.
3576*0Sstevel@tonic-gate 			 * Proceed with the E$ scrubbing.
3577*0Sstevel@tonic-gate 			 */
3578*0Sstevel@tonic-gate 			if (state & cpu_ec_state_dirty)
3579*0Sstevel@tonic-gate 				mpb |= ECACHE_STATE_MODIFIED;
3580*0Sstevel@tonic-gate 
3581*0Sstevel@tonic-gate 			tafsr = check_ecache_line(index, acc_afsr);
3582*0Sstevel@tonic-gate 
3583*0Sstevel@tonic-gate 			if (tafsr & P_AFSR_EDP) {
3584*0Sstevel@tonic-gate 				mpb |= ECACHE_STATE_PARITY;
3585*0Sstevel@tonic-gate 
3586*0Sstevel@tonic-gate 				if (ecache_scrub_verbose ||
3587*0Sstevel@tonic-gate 							ecache_scrub_panic) {
3588*0Sstevel@tonic-gate 					get_ecache_dtag(P2ALIGN(index, 64),
3589*0Sstevel@tonic-gate 						(uint64_t *)&ec_data[0],
3590*0Sstevel@tonic-gate 						&ec_tag, &oafsr, acc_afsr);
3591*0Sstevel@tonic-gate 				}
3592*0Sstevel@tonic-gate 			}
3593*0Sstevel@tonic-gate 
3594*0Sstevel@tonic-gate 			if (ssmp->ecache_busy)
3595*0Sstevel@tonic-gate 				mpb |= ECACHE_STATE_BUSY;
3596*0Sstevel@tonic-gate 
3597*0Sstevel@tonic-gate 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3598*0Sstevel@tonic-gate 			ec_knp->value.ul++;
3599*0Sstevel@tonic-gate 
3600*0Sstevel@tonic-gate 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3601*0Sstevel@tonic-gate 				cpu_ec_tag_shift) | (index % ec_set_size);
3602*0Sstevel@tonic-gate 
3603*0Sstevel@tonic-gate 			/*
3604*0Sstevel@tonic-gate 			 * We flush the E$ lines depending on the ec_flush,
3605*0Sstevel@tonic-gate 			 * we additionally flush clean_good_busy and
3606*0Sstevel@tonic-gate 			 * dirty_good_busy lines for mirrored E$.
3607*0Sstevel@tonic-gate 			 */
3608*0Sstevel@tonic-gate 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3609*0Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
3610*0Sstevel@tonic-gate 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3611*0Sstevel@tonic-gate 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3612*0Sstevel@tonic-gate 					flushecacheline(paddr, ec_size);
3613*0Sstevel@tonic-gate 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3614*0Sstevel@tonic-gate 				softcall(ecache_page_retire, (void *)paddr);
3615*0Sstevel@tonic-gate 			}
3616*0Sstevel@tonic-gate 
3617*0Sstevel@tonic-gate 			/*
3618*0Sstevel@tonic-gate 			 * Conditionally flush both the clean_good and
3619*0Sstevel@tonic-gate 			 * dirty_good lines when busy.
3620*0Sstevel@tonic-gate 			 */
3621*0Sstevel@tonic-gate 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3622*0Sstevel@tonic-gate 				flush_clean_busy--;
3623*0Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
3624*0Sstevel@tonic-gate 				ec_ksp->clean_good_busy_flush.value.ul++;
3625*0Sstevel@tonic-gate 			} else if (DGB(mpb, ec_mirror) &&
3626*0Sstevel@tonic-gate 						(flush_dirty_busy > 0)) {
3627*0Sstevel@tonic-gate 				flush_dirty_busy--;
3628*0Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
3629*0Sstevel@tonic-gate 				ec_ksp->dirty_good_busy_flush.value.ul++;
3630*0Sstevel@tonic-gate 			}
3631*0Sstevel@tonic-gate 
3632*0Sstevel@tonic-gate 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3633*0Sstevel@tonic-gate 						ecache_scrub_panic)) {
3634*0Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3635*0Sstevel@tonic-gate 						tafsr);
3636*0Sstevel@tonic-gate 			}
3637*0Sstevel@tonic-gate 
3638*0Sstevel@tonic-gate 		} else {
3639*0Sstevel@tonic-gate 			ec_ksp->invalid_lines.value.ul++;
3640*0Sstevel@tonic-gate 		}
3641*0Sstevel@tonic-gate 
3642*0Sstevel@tonic-gate 		if ((index += ec_linesize) >= ec_size)
3643*0Sstevel@tonic-gate 			index = 0;
3644*0Sstevel@tonic-gate 
3645*0Sstevel@tonic-gate 	}
3646*0Sstevel@tonic-gate 
3647*0Sstevel@tonic-gate 	/*
3648*0Sstevel@tonic-gate 	 * set the ecache scrub index for the next time around
3649*0Sstevel@tonic-gate 	 */
3650*0Sstevel@tonic-gate 	ssmp->ecache_flush_index = index;
3651*0Sstevel@tonic-gate 
3652*0Sstevel@tonic-gate 	if (*acc_afsr & P_AFSR_CP) {
3653*0Sstevel@tonic-gate 		uint64_t ret_afsr;
3654*0Sstevel@tonic-gate 
3655*0Sstevel@tonic-gate 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3656*0Sstevel@tonic-gate 		if ((ret_afsr & P_AFSR_CP) == 0)
3657*0Sstevel@tonic-gate 			*acc_afsr = 0;
3658*0Sstevel@tonic-gate 	}
3659*0Sstevel@tonic-gate }
3660*0Sstevel@tonic-gate 
3661*0Sstevel@tonic-gate /*
3662*0Sstevel@tonic-gate  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3663*0Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
3664*0Sstevel@tonic-gate  */
3665*0Sstevel@tonic-gate 
3666*0Sstevel@tonic-gate /*ARGSUSED*/
3667*0Sstevel@tonic-gate uint_t
3668*0Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3669*0Sstevel@tonic-gate {
3670*0Sstevel@tonic-gate 	int i;
3671*0Sstevel@tonic-gate 	int outstanding;
3672*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3673*0Sstevel@tonic-gate 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3674*0Sstevel@tonic-gate 
3675*0Sstevel@tonic-gate 	do {
3676*0Sstevel@tonic-gate 		outstanding = *countp;
3677*0Sstevel@tonic-gate 		ASSERT(outstanding > 0);
3678*0Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++)
3679*0Sstevel@tonic-gate 			scrub_ecache_line();
3680*0Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
3681*0Sstevel@tonic-gate 
3682*0Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
3683*0Sstevel@tonic-gate }
3684*0Sstevel@tonic-gate 
3685*0Sstevel@tonic-gate /*
3686*0Sstevel@tonic-gate  * force each cpu to perform an ecache scrub, called from a timeout
3687*0Sstevel@tonic-gate  */
3688*0Sstevel@tonic-gate extern xcfunc_t ecache_scrubreq_tl1;
3689*0Sstevel@tonic-gate 
3690*0Sstevel@tonic-gate void
3691*0Sstevel@tonic-gate do_scrub_ecache_line(void)
3692*0Sstevel@tonic-gate {
3693*0Sstevel@tonic-gate 	long delta;
3694*0Sstevel@tonic-gate 
3695*0Sstevel@tonic-gate 	if (ecache_calls_a_sec > hz)
3696*0Sstevel@tonic-gate 		ecache_calls_a_sec = hz;
3697*0Sstevel@tonic-gate 	else if (ecache_calls_a_sec <= 0)
3698*0Sstevel@tonic-gate 	    ecache_calls_a_sec = 1;
3699*0Sstevel@tonic-gate 
3700*0Sstevel@tonic-gate 	if (ecache_calls_a_sec_mirrored > hz)
3701*0Sstevel@tonic-gate 		ecache_calls_a_sec_mirrored = hz;
3702*0Sstevel@tonic-gate 	else if (ecache_calls_a_sec_mirrored <= 0)
3703*0Sstevel@tonic-gate 	    ecache_calls_a_sec_mirrored = 1;
3704*0Sstevel@tonic-gate 
3705*0Sstevel@tonic-gate 	if (ecache_scrub_enable) {
3706*0Sstevel@tonic-gate 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3707*0Sstevel@tonic-gate 		delta = hz / ec_timeout_calls;
3708*0Sstevel@tonic-gate 	} else {
3709*0Sstevel@tonic-gate 		delta = hz;
3710*0Sstevel@tonic-gate 	}
3711*0Sstevel@tonic-gate 
3712*0Sstevel@tonic-gate 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3713*0Sstevel@tonic-gate 		delta);
3714*0Sstevel@tonic-gate }
3715*0Sstevel@tonic-gate 
3716*0Sstevel@tonic-gate /*
3717*0Sstevel@tonic-gate  * initialization for ecache scrubbing
3718*0Sstevel@tonic-gate  * This routine is called AFTER all cpus have had cpu_init_private called
3719*0Sstevel@tonic-gate  * to initialize their private data areas.
3720*0Sstevel@tonic-gate  */
3721*0Sstevel@tonic-gate void
3722*0Sstevel@tonic-gate cpu_init_cache_scrub(void)
3723*0Sstevel@tonic-gate {
3724*0Sstevel@tonic-gate 	if (ecache_calls_a_sec > hz) {
3725*0Sstevel@tonic-gate 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3726*0Sstevel@tonic-gate 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3727*0Sstevel@tonic-gate 		ecache_calls_a_sec = hz;
3728*0Sstevel@tonic-gate 	}
3729*0Sstevel@tonic-gate 
3730*0Sstevel@tonic-gate 	/*
3731*0Sstevel@tonic-gate 	 * Register softint for ecache scrubbing.
3732*0Sstevel@tonic-gate 	 */
3733*0Sstevel@tonic-gate 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3734*0Sstevel@tonic-gate 	    scrub_ecache_line_intr, NULL);
3735*0Sstevel@tonic-gate 
3736*0Sstevel@tonic-gate 	/*
3737*0Sstevel@tonic-gate 	 * kick off the scrubbing using realtime timeout
3738*0Sstevel@tonic-gate 	 */
3739*0Sstevel@tonic-gate 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3740*0Sstevel@tonic-gate 	    hz / ecache_calls_a_sec);
3741*0Sstevel@tonic-gate }
3742*0Sstevel@tonic-gate 
3743*0Sstevel@tonic-gate /*
3744*0Sstevel@tonic-gate  * Unset the busy flag for this cpu.
3745*0Sstevel@tonic-gate  */
3746*0Sstevel@tonic-gate void
3747*0Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp)
3748*0Sstevel@tonic-gate {
3749*0Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
3750*0Sstevel@tonic-gate 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3751*0Sstevel@tonic-gate 							sfpr_scrub_misc);
3752*0Sstevel@tonic-gate 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3753*0Sstevel@tonic-gate 	}
3754*0Sstevel@tonic-gate }
3755*0Sstevel@tonic-gate 
3756*0Sstevel@tonic-gate /*
3757*0Sstevel@tonic-gate  * Set the busy flag for this cpu.
3758*0Sstevel@tonic-gate  */
3759*0Sstevel@tonic-gate void
3760*0Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp)
3761*0Sstevel@tonic-gate {
3762*0Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
3763*0Sstevel@tonic-gate 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3764*0Sstevel@tonic-gate 							sfpr_scrub_misc);
3765*0Sstevel@tonic-gate 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3766*0Sstevel@tonic-gate 	}
3767*0Sstevel@tonic-gate }
3768*0Sstevel@tonic-gate 
3769*0Sstevel@tonic-gate /*
3770*0Sstevel@tonic-gate  * initialize the ecache scrubber data structures
3771*0Sstevel@tonic-gate  * The global entry point cpu_init_private replaces this entry point.
3772*0Sstevel@tonic-gate  *
3773*0Sstevel@tonic-gate  */
3774*0Sstevel@tonic-gate static void
3775*0Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp)
3776*0Sstevel@tonic-gate {
3777*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3778*0Sstevel@tonic-gate 	int cpuid = cp->cpu_id;
3779*0Sstevel@tonic-gate 
3780*0Sstevel@tonic-gate 	/*
3781*0Sstevel@tonic-gate 	 * intialize bookkeeping for cache scrubbing
3782*0Sstevel@tonic-gate 	 */
3783*0Sstevel@tonic-gate 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3784*0Sstevel@tonic-gate 
3785*0Sstevel@tonic-gate 	ssmp->ecache_flush_index = 0;
3786*0Sstevel@tonic-gate 
3787*0Sstevel@tonic-gate 	ssmp->ecache_nlines =
3788*0Sstevel@tonic-gate 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3789*0Sstevel@tonic-gate 
3790*0Sstevel@tonic-gate 	/*
3791*0Sstevel@tonic-gate 	 * Determine whether we are running on mirrored SRAM
3792*0Sstevel@tonic-gate 	 */
3793*0Sstevel@tonic-gate 
3794*0Sstevel@tonic-gate 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3795*0Sstevel@tonic-gate 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3796*0Sstevel@tonic-gate 	else
3797*0Sstevel@tonic-gate 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3798*0Sstevel@tonic-gate 
3799*0Sstevel@tonic-gate 	cpu_busy_ecache_scrub(cp);
3800*0Sstevel@tonic-gate 
3801*0Sstevel@tonic-gate 	/*
3802*0Sstevel@tonic-gate 	 * initialize the kstats
3803*0Sstevel@tonic-gate 	 */
3804*0Sstevel@tonic-gate 	ecache_kstat_init(cp);
3805*0Sstevel@tonic-gate }
3806*0Sstevel@tonic-gate 
3807*0Sstevel@tonic-gate /*
3808*0Sstevel@tonic-gate  * uninitialize the ecache scrubber data structures
3809*0Sstevel@tonic-gate  * The global entry point cpu_uninit_private replaces this entry point.
3810*0Sstevel@tonic-gate  */
3811*0Sstevel@tonic-gate static void
3812*0Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3813*0Sstevel@tonic-gate {
3814*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3815*0Sstevel@tonic-gate 
3816*0Sstevel@tonic-gate 	if (ssmp->ecache_ksp != NULL) {
3817*0Sstevel@tonic-gate 		kstat_delete(ssmp->ecache_ksp);
3818*0Sstevel@tonic-gate 		ssmp->ecache_ksp = NULL;
3819*0Sstevel@tonic-gate 	}
3820*0Sstevel@tonic-gate 
3821*0Sstevel@tonic-gate 	/*
3822*0Sstevel@tonic-gate 	 * un-initialize bookkeeping for cache scrubbing
3823*0Sstevel@tonic-gate 	 */
3824*0Sstevel@tonic-gate 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3825*0Sstevel@tonic-gate 
3826*0Sstevel@tonic-gate 	cpu_idle_ecache_scrub(cp);
3827*0Sstevel@tonic-gate }
3828*0Sstevel@tonic-gate 
3829*0Sstevel@tonic-gate struct kmem_cache *sf_private_cache;
3830*0Sstevel@tonic-gate 
3831*0Sstevel@tonic-gate /*
3832*0Sstevel@tonic-gate  * Cpu private initialization.  This includes allocating the cpu_private
3833*0Sstevel@tonic-gate  * data structure, initializing it, and initializing the scrubber for this
3834*0Sstevel@tonic-gate  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3835*0Sstevel@tonic-gate  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3836*0Sstevel@tonic-gate  * We use kmem_cache_create for the spitfire private data structure because it
3837*0Sstevel@tonic-gate  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3838*0Sstevel@tonic-gate  */
3839*0Sstevel@tonic-gate void
3840*0Sstevel@tonic-gate cpu_init_private(struct cpu *cp)
3841*0Sstevel@tonic-gate {
3842*0Sstevel@tonic-gate 	spitfire_private_t *sfprp;
3843*0Sstevel@tonic-gate 
3844*0Sstevel@tonic-gate 	ASSERT(CPU_PRIVATE(cp) == NULL);
3845*0Sstevel@tonic-gate 
3846*0Sstevel@tonic-gate 	/*
3847*0Sstevel@tonic-gate 	 * If the sf_private_cache has not been created, create it.
3848*0Sstevel@tonic-gate 	 */
3849*0Sstevel@tonic-gate 	if (sf_private_cache == NULL) {
3850*0Sstevel@tonic-gate 		sf_private_cache = kmem_cache_create("sf_private_cache",
3851*0Sstevel@tonic-gate 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3852*0Sstevel@tonic-gate 			NULL, NULL, NULL, NULL, 0);
3853*0Sstevel@tonic-gate 		ASSERT(sf_private_cache);
3854*0Sstevel@tonic-gate 	}
3855*0Sstevel@tonic-gate 
3856*0Sstevel@tonic-gate 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3857*0Sstevel@tonic-gate 
3858*0Sstevel@tonic-gate 	bzero(sfprp, sizeof (spitfire_private_t));
3859*0Sstevel@tonic-gate 
3860*0Sstevel@tonic-gate 	cpu_init_ecache_scrub_dr(cp);
3861*0Sstevel@tonic-gate }
3862*0Sstevel@tonic-gate 
3863*0Sstevel@tonic-gate /*
3864*0Sstevel@tonic-gate  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3865*0Sstevel@tonic-gate  * deallocate the scrubber data structures and cpu_private data structure.
3866*0Sstevel@tonic-gate  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3867*0Sstevel@tonic-gate  * the scrubber for the specified cpu.
3868*0Sstevel@tonic-gate  */
3869*0Sstevel@tonic-gate void
3870*0Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp)
3871*0Sstevel@tonic-gate {
3872*0Sstevel@tonic-gate 	ASSERT(CPU_PRIVATE(cp));
3873*0Sstevel@tonic-gate 
3874*0Sstevel@tonic-gate 	cpu_uninit_ecache_scrub_dr(cp);
3875*0Sstevel@tonic-gate 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3876*0Sstevel@tonic-gate 	CPU_PRIVATE(cp) = NULL;
3877*0Sstevel@tonic-gate }
3878*0Sstevel@tonic-gate 
3879*0Sstevel@tonic-gate /*
3880*0Sstevel@tonic-gate  * initialize the ecache kstats for each cpu
3881*0Sstevel@tonic-gate  */
3882*0Sstevel@tonic-gate static void
3883*0Sstevel@tonic-gate ecache_kstat_init(struct cpu *cp)
3884*0Sstevel@tonic-gate {
3885*0Sstevel@tonic-gate 	struct kstat *ksp;
3886*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3887*0Sstevel@tonic-gate 
3888*0Sstevel@tonic-gate 	ASSERT(ssmp != NULL);
3889*0Sstevel@tonic-gate 
3890*0Sstevel@tonic-gate 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3891*0Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
3892*0Sstevel@tonic-gate 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3893*0Sstevel@tonic-gate 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3894*0Sstevel@tonic-gate 		ssmp->ecache_ksp = NULL;
3895*0Sstevel@tonic-gate 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3896*0Sstevel@tonic-gate 		return;
3897*0Sstevel@tonic-gate 	}
3898*0Sstevel@tonic-gate 
3899*0Sstevel@tonic-gate 	ssmp->ecache_ksp = ksp;
3900*0Sstevel@tonic-gate 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3901*0Sstevel@tonic-gate 	kstat_install(ksp);
3902*0Sstevel@tonic-gate }
3903*0Sstevel@tonic-gate 
3904*0Sstevel@tonic-gate /*
3905*0Sstevel@tonic-gate  * log the bad ecache information
3906*0Sstevel@tonic-gate  */
3907*0Sstevel@tonic-gate static void
3908*0Sstevel@tonic-gate ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3909*0Sstevel@tonic-gate 		uint64_t afsr)
3910*0Sstevel@tonic-gate {
3911*0Sstevel@tonic-gate 	spitf_async_flt spf_flt;
3912*0Sstevel@tonic-gate 	struct async_flt *aflt;
3913*0Sstevel@tonic-gate 	int i;
3914*0Sstevel@tonic-gate 	char *class;
3915*0Sstevel@tonic-gate 
3916*0Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
3917*0Sstevel@tonic-gate 	aflt = &spf_flt.cmn_asyncflt;
3918*0Sstevel@tonic-gate 
3919*0Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
3920*0Sstevel@tonic-gate 		spf_flt.flt_ec_data[i] = ec_data[i];
3921*0Sstevel@tonic-gate 	}
3922*0Sstevel@tonic-gate 
3923*0Sstevel@tonic-gate 	spf_flt.flt_ec_tag = ec_tag;
3924*0Sstevel@tonic-gate 
3925*0Sstevel@tonic-gate 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3926*0Sstevel@tonic-gate 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3927*0Sstevel@tonic-gate 	} else spf_flt.flt_type = (ushort_t)mpb;
3928*0Sstevel@tonic-gate 
3929*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
3930*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
3931*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
3932*0Sstevel@tonic-gate 	aflt->flt_addr = paddr;
3933*0Sstevel@tonic-gate 	aflt->flt_stat = afsr;
3934*0Sstevel@tonic-gate 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3935*0Sstevel@tonic-gate 
3936*0Sstevel@tonic-gate 	switch (mpb) {
3937*0Sstevel@tonic-gate 	case CPU_ECACHE_TAG_ERR:
3938*0Sstevel@tonic-gate 	case CPU_ECACHE_ADDR_PAR_ERR:
3939*0Sstevel@tonic-gate 	case CPU_ECACHE_ETP_ETS_ERR:
3940*0Sstevel@tonic-gate 	case CPU_ECACHE_STATE_ERR:
3941*0Sstevel@tonic-gate 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3942*0Sstevel@tonic-gate 		break;
3943*0Sstevel@tonic-gate 	default:
3944*0Sstevel@tonic-gate 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3945*0Sstevel@tonic-gate 		break;
3946*0Sstevel@tonic-gate 	}
3947*0Sstevel@tonic-gate 
3948*0Sstevel@tonic-gate 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3949*0Sstevel@tonic-gate 	    ue_queue, aflt->flt_panic);
3950*0Sstevel@tonic-gate 
3951*0Sstevel@tonic-gate 	if (aflt->flt_panic)
3952*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3953*0Sstevel@tonic-gate 					"line detected");
3954*0Sstevel@tonic-gate }
3955*0Sstevel@tonic-gate 
3956*0Sstevel@tonic-gate /*
3957*0Sstevel@tonic-gate  * Process an ecache error that occured during the E$ scrubbing.
3958*0Sstevel@tonic-gate  * We do the ecache scan to find the bad line, flush the bad line
3959*0Sstevel@tonic-gate  * and start the memscrubber to find any UE (in memory or in another cache)
3960*0Sstevel@tonic-gate  */
3961*0Sstevel@tonic-gate static uint64_t
3962*0Sstevel@tonic-gate ecache_scrub_misc_err(int type, uint64_t afsr)
3963*0Sstevel@tonic-gate {
3964*0Sstevel@tonic-gate 	spitf_async_flt spf_flt;
3965*0Sstevel@tonic-gate 	struct async_flt *aflt;
3966*0Sstevel@tonic-gate 	uint64_t oafsr;
3967*0Sstevel@tonic-gate 
3968*0Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
3969*0Sstevel@tonic-gate 	aflt = &spf_flt.cmn_asyncflt;
3970*0Sstevel@tonic-gate 
3971*0Sstevel@tonic-gate 	/*
3972*0Sstevel@tonic-gate 	 * Scan each line in the cache to look for the one
3973*0Sstevel@tonic-gate 	 * with bad parity
3974*0Sstevel@tonic-gate 	 */
3975*0Sstevel@tonic-gate 	aflt->flt_addr = AFLT_INV_ADDR;
3976*0Sstevel@tonic-gate 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3977*0Sstevel@tonic-gate 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3978*0Sstevel@tonic-gate 
3979*0Sstevel@tonic-gate 	if (oafsr & P_AFSR_CP) {
3980*0Sstevel@tonic-gate 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3981*0Sstevel@tonic-gate 		*cp_afsr |= oafsr;
3982*0Sstevel@tonic-gate 	}
3983*0Sstevel@tonic-gate 
3984*0Sstevel@tonic-gate 	/*
3985*0Sstevel@tonic-gate 	 * If we found a bad PA, update the state to indicate if it is
3986*0Sstevel@tonic-gate 	 * memory or I/O space.
3987*0Sstevel@tonic-gate 	 */
3988*0Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3989*0Sstevel@tonic-gate 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3990*0Sstevel@tonic-gate 			MMU_PAGESHIFT)) ? 1 : 0;
3991*0Sstevel@tonic-gate 	}
3992*0Sstevel@tonic-gate 
3993*0Sstevel@tonic-gate 	spf_flt.flt_type = (ushort_t)type;
3994*0Sstevel@tonic-gate 
3995*0Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
3996*0Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
3997*0Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
3998*0Sstevel@tonic-gate 	aflt->flt_status = afsr;
3999*0Sstevel@tonic-gate 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
4000*0Sstevel@tonic-gate 
4001*0Sstevel@tonic-gate 	/*
4002*0Sstevel@tonic-gate 	 * We have the bad line, flush that line and start
4003*0Sstevel@tonic-gate 	 * the memscrubber.
4004*0Sstevel@tonic-gate 	 */
4005*0Sstevel@tonic-gate 	if (spf_flt.flt_ec_lcnt > 0) {
4006*0Sstevel@tonic-gate 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
4007*0Sstevel@tonic-gate 			cpunodes[CPU->cpu_id].ecache_size);
4008*0Sstevel@tonic-gate 		read_all_memscrub = 1;
4009*0Sstevel@tonic-gate 		memscrub_run();
4010*0Sstevel@tonic-gate 	}
4011*0Sstevel@tonic-gate 
4012*0Sstevel@tonic-gate 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
4013*0Sstevel@tonic-gate 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
4014*0Sstevel@tonic-gate 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
4015*0Sstevel@tonic-gate 
4016*0Sstevel@tonic-gate 	return (oafsr);
4017*0Sstevel@tonic-gate }
4018*0Sstevel@tonic-gate 
4019*0Sstevel@tonic-gate static void
4020*0Sstevel@tonic-gate ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
4021*0Sstevel@tonic-gate {
4022*0Sstevel@tonic-gate 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4023*0Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4024*0Sstevel@tonic-gate 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4025*0Sstevel@tonic-gate 	uint64_t ec_tag, paddr, oafsr;
4026*0Sstevel@tonic-gate 	ec_data_t ec_data[8];
4027*0Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
4028*0Sstevel@tonic-gate 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4029*0Sstevel@tonic-gate 						ecache_associativity;
4030*0Sstevel@tonic-gate 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4031*0Sstevel@tonic-gate 
4032*0Sstevel@tonic-gate 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4033*0Sstevel@tonic-gate 			&oafsr, cpu_afsr);
4034*0Sstevel@tonic-gate 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4035*0Sstevel@tonic-gate 						(index % ec_set_size);
4036*0Sstevel@tonic-gate 
4037*0Sstevel@tonic-gate 	/*
4038*0Sstevel@tonic-gate 	 * E$ tag state has good parity
4039*0Sstevel@tonic-gate 	 */
4040*0Sstevel@tonic-gate 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4041*0Sstevel@tonic-gate 		if (afsr_ets & cpu_ec_parity) {
4042*0Sstevel@tonic-gate 			/*
4043*0Sstevel@tonic-gate 			 * E$ tag state bits indicate the line is clean,
4044*0Sstevel@tonic-gate 			 * invalidate the E$ tag and continue.
4045*0Sstevel@tonic-gate 			 */
4046*0Sstevel@tonic-gate 			if (!(state & cpu_ec_state_dirty)) {
4047*0Sstevel@tonic-gate 				/*
4048*0Sstevel@tonic-gate 				 * Zero the tag and mark the state invalid
4049*0Sstevel@tonic-gate 				 * with good parity for the tag.
4050*0Sstevel@tonic-gate 				 */
4051*0Sstevel@tonic-gate 				if (isus2i || isus2e)
4052*0Sstevel@tonic-gate 					write_hb_ec_tag_parity(index);
4053*0Sstevel@tonic-gate 				else
4054*0Sstevel@tonic-gate 					write_ec_tag_parity(index);
4055*0Sstevel@tonic-gate 
4056*0Sstevel@tonic-gate 				/* Sync with the dual tag */
4057*0Sstevel@tonic-gate 				flushecacheline(0,
4058*0Sstevel@tonic-gate 					cpunodes[CPU->cpu_id].ecache_size);
4059*0Sstevel@tonic-gate 				ec_ksp->tags_cleared.value.ul++;
4060*0Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr,
4061*0Sstevel@tonic-gate 					CPU_ECACHE_TAG_ERR, afsr);
4062*0Sstevel@tonic-gate 				return;
4063*0Sstevel@tonic-gate 			} else {
4064*0Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr,
4065*0Sstevel@tonic-gate 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4066*0Sstevel@tonic-gate 				cmn_err(CE_PANIC, " E$ tag address has bad"
4067*0Sstevel@tonic-gate 							" parity");
4068*0Sstevel@tonic-gate 			}
4069*0Sstevel@tonic-gate 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4070*0Sstevel@tonic-gate 			/*
4071*0Sstevel@tonic-gate 			 * ETS is zero but ETP is set
4072*0Sstevel@tonic-gate 			 */
4073*0Sstevel@tonic-gate 			ecache_scrub_log(ec_data, ec_tag, paddr,
4074*0Sstevel@tonic-gate 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4075*0Sstevel@tonic-gate 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4076*0Sstevel@tonic-gate 				" AFSR.ETS is zero");
4077*0Sstevel@tonic-gate 		}
4078*0Sstevel@tonic-gate 	} else {
4079*0Sstevel@tonic-gate 		/*
4080*0Sstevel@tonic-gate 		 * E$ tag state bit has a bad parity
4081*0Sstevel@tonic-gate 		 */
4082*0Sstevel@tonic-gate 		ecache_scrub_log(ec_data, ec_tag, paddr,
4083*0Sstevel@tonic-gate 				CPU_ECACHE_STATE_ERR, afsr);
4084*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4085*0Sstevel@tonic-gate 	}
4086*0Sstevel@tonic-gate }
4087*0Sstevel@tonic-gate 
4088*0Sstevel@tonic-gate static void
4089*0Sstevel@tonic-gate ecache_page_retire(void *arg)
4090*0Sstevel@tonic-gate {
4091*0Sstevel@tonic-gate 	uint64_t paddr = (uint64_t)arg;
4092*0Sstevel@tonic-gate 	page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT));
4093*0Sstevel@tonic-gate 
4094*0Sstevel@tonic-gate 	if (pp) {
4095*0Sstevel@tonic-gate 		page_settoxic(pp, PAGE_IS_FAULTY);
4096*0Sstevel@tonic-gate 		(void) page_retire(pp, PAGE_IS_TOXIC);
4097*0Sstevel@tonic-gate 	}
4098*0Sstevel@tonic-gate }
4099*0Sstevel@tonic-gate 
4100*0Sstevel@tonic-gate void
4101*0Sstevel@tonic-gate sticksync_slave(void)
4102*0Sstevel@tonic-gate {}
4103*0Sstevel@tonic-gate 
4104*0Sstevel@tonic-gate void
4105*0Sstevel@tonic-gate sticksync_master(void)
4106*0Sstevel@tonic-gate {}
4107*0Sstevel@tonic-gate 
4108*0Sstevel@tonic-gate /*ARGSUSED*/
4109*0Sstevel@tonic-gate void
4110*0Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4111*0Sstevel@tonic-gate {}
4112*0Sstevel@tonic-gate 
4113*0Sstevel@tonic-gate void
4114*0Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4115*0Sstevel@tonic-gate {
4116*0Sstevel@tonic-gate 	int status;
4117*0Sstevel@tonic-gate 	ddi_fm_error_t de;
4118*0Sstevel@tonic-gate 
4119*0Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
4120*0Sstevel@tonic-gate 
4121*0Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4122*0Sstevel@tonic-gate 	    FM_ENA_FMT1);
4123*0Sstevel@tonic-gate 	de.fme_flag = expected;
4124*0Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
4125*0Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4126*0Sstevel@tonic-gate 
4127*0Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4128*0Sstevel@tonic-gate 		aflt->flt_panic = 1;
4129*0Sstevel@tonic-gate }
4130*0Sstevel@tonic-gate 
4131*0Sstevel@tonic-gate /*ARGSUSED*/
4132*0Sstevel@tonic-gate void
4133*0Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4134*0Sstevel@tonic-gate     errorq_t *eqp, uint_t flag)
4135*0Sstevel@tonic-gate {
4136*0Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)payload;
4137*0Sstevel@tonic-gate 
4138*0Sstevel@tonic-gate 	aflt->flt_erpt_class = error_class;
4139*0Sstevel@tonic-gate 	errorq_dispatch(eqp, payload, payload_sz, flag);
4140*0Sstevel@tonic-gate }
4141*0Sstevel@tonic-gate 
4142*0Sstevel@tonic-gate #define	MAX_SIMM	8
4143*0Sstevel@tonic-gate 
4144*0Sstevel@tonic-gate struct ce_info {
4145*0Sstevel@tonic-gate 	char    name[UNUM_NAMLEN];
4146*0Sstevel@tonic-gate 	uint64_t intermittent_total;
4147*0Sstevel@tonic-gate 	uint64_t persistent_total;
4148*0Sstevel@tonic-gate 	uint64_t sticky_total;
4149*0Sstevel@tonic-gate 	unsigned short leaky_bucket_cnt;
4150*0Sstevel@tonic-gate };
4151*0Sstevel@tonic-gate 
4152*0Sstevel@tonic-gate /*
4153*0Sstevel@tonic-gate  * Separately-defined structure for use in reporting the ce_info
4154*0Sstevel@tonic-gate  * to SunVTS without exposing the internal layout and implementation
4155*0Sstevel@tonic-gate  * of struct ce_info.
4156*0Sstevel@tonic-gate  */
4157*0Sstevel@tonic-gate static struct ecc_error_info ecc_error_info_data = {
4158*0Sstevel@tonic-gate 	{ "version", KSTAT_DATA_UINT32 },
4159*0Sstevel@tonic-gate 	{ "maxcount", KSTAT_DATA_UINT32 },
4160*0Sstevel@tonic-gate 	{ "count", KSTAT_DATA_UINT32 }
4161*0Sstevel@tonic-gate };
4162*0Sstevel@tonic-gate static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4163*0Sstevel@tonic-gate     sizeof (struct kstat_named);
4164*0Sstevel@tonic-gate 
4165*0Sstevel@tonic-gate #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4166*0Sstevel@tonic-gate #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4167*0Sstevel@tonic-gate #endif
4168*0Sstevel@tonic-gate 
4169*0Sstevel@tonic-gate struct ce_info  *mem_ce_simm = NULL;
4170*0Sstevel@tonic-gate size_t mem_ce_simm_size = 0;
4171*0Sstevel@tonic-gate 
4172*0Sstevel@tonic-gate /*
4173*0Sstevel@tonic-gate  * Default values for the number of CE's allowed per interval.
4174*0Sstevel@tonic-gate  * Interval is defined in minutes
4175*0Sstevel@tonic-gate  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4176*0Sstevel@tonic-gate  */
4177*0Sstevel@tonic-gate #define	SOFTERR_LIMIT_DEFAULT		2
4178*0Sstevel@tonic-gate #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4179*0Sstevel@tonic-gate #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4180*0Sstevel@tonic-gate #define	TIMEOUT_NONE			((timeout_id_t)0)
4181*0Sstevel@tonic-gate #define	TIMEOUT_SET			((timeout_id_t)1)
4182*0Sstevel@tonic-gate 
4183*0Sstevel@tonic-gate /*
4184*0Sstevel@tonic-gate  * timeout identifer for leaky_bucket
4185*0Sstevel@tonic-gate  */
4186*0Sstevel@tonic-gate static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4187*0Sstevel@tonic-gate 
4188*0Sstevel@tonic-gate /*
4189*0Sstevel@tonic-gate  * Tunables for maximum number of allowed CE's in a given time
4190*0Sstevel@tonic-gate  */
4191*0Sstevel@tonic-gate int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4192*0Sstevel@tonic-gate int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4193*0Sstevel@tonic-gate 
4194*0Sstevel@tonic-gate void
4195*0Sstevel@tonic-gate cpu_mp_init(void)
4196*0Sstevel@tonic-gate {
4197*0Sstevel@tonic-gate 	size_t size = cpu_aflt_size();
4198*0Sstevel@tonic-gate 	size_t i;
4199*0Sstevel@tonic-gate 	kstat_t *ksp;
4200*0Sstevel@tonic-gate 
4201*0Sstevel@tonic-gate 	/*
4202*0Sstevel@tonic-gate 	 * Initialize the CE error handling buffers.
4203*0Sstevel@tonic-gate 	 */
4204*0Sstevel@tonic-gate 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4205*0Sstevel@tonic-gate 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4206*0Sstevel@tonic-gate 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4207*0Sstevel@tonic-gate 
4208*0Sstevel@tonic-gate 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4209*0Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4210*0Sstevel@tonic-gate 	if (ksp != NULL) {
4211*0Sstevel@tonic-gate 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4212*0Sstevel@tonic-gate 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4213*0Sstevel@tonic-gate 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4214*0Sstevel@tonic-gate 		ecc_error_info_data.count.value.ui32 = 0;
4215*0Sstevel@tonic-gate 		kstat_install(ksp);
4216*0Sstevel@tonic-gate 	}
4217*0Sstevel@tonic-gate 
4218*0Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
4219*0Sstevel@tonic-gate 		struct kstat_ecc_mm_info *kceip;
4220*0Sstevel@tonic-gate 
4221*0Sstevel@tonic-gate 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4222*0Sstevel@tonic-gate 		    KM_SLEEP);
4223*0Sstevel@tonic-gate 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4224*0Sstevel@tonic-gate 		    KSTAT_TYPE_NAMED,
4225*0Sstevel@tonic-gate 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4226*0Sstevel@tonic-gate 		    KSTAT_FLAG_VIRTUAL);
4227*0Sstevel@tonic-gate 		if (ksp != NULL) {
4228*0Sstevel@tonic-gate 			/*
4229*0Sstevel@tonic-gate 			 * Re-declare ks_data_size to include room for the
4230*0Sstevel@tonic-gate 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4231*0Sstevel@tonic-gate 			 * set.
4232*0Sstevel@tonic-gate 			 */
4233*0Sstevel@tonic-gate 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4234*0Sstevel@tonic-gate 			    KSTAT_CE_UNUM_NAMLEN;
4235*0Sstevel@tonic-gate 			ksp->ks_data = kceip;
4236*0Sstevel@tonic-gate 			kstat_named_init(&kceip->name,
4237*0Sstevel@tonic-gate 			    "name", KSTAT_DATA_STRING);
4238*0Sstevel@tonic-gate 			kstat_named_init(&kceip->intermittent_total,
4239*0Sstevel@tonic-gate 			    "intermittent_total", KSTAT_DATA_UINT64);
4240*0Sstevel@tonic-gate 			kstat_named_init(&kceip->persistent_total,
4241*0Sstevel@tonic-gate 			    "persistent_total", KSTAT_DATA_UINT64);
4242*0Sstevel@tonic-gate 			kstat_named_init(&kceip->sticky_total,
4243*0Sstevel@tonic-gate 			    "sticky_total", KSTAT_DATA_UINT64);
4244*0Sstevel@tonic-gate 			/*
4245*0Sstevel@tonic-gate 			 * Use the default snapshot routine as it knows how to
4246*0Sstevel@tonic-gate 			 * deal with named kstats with long strings.
4247*0Sstevel@tonic-gate 			 */
4248*0Sstevel@tonic-gate 			ksp->ks_update = ecc_kstat_update;
4249*0Sstevel@tonic-gate 			kstat_install(ksp);
4250*0Sstevel@tonic-gate 		} else {
4251*0Sstevel@tonic-gate 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4252*0Sstevel@tonic-gate 		}
4253*0Sstevel@tonic-gate 	}
4254*0Sstevel@tonic-gate }
4255*0Sstevel@tonic-gate 
4256*0Sstevel@tonic-gate /*ARGSUSED*/
4257*0Sstevel@tonic-gate static void
4258*0Sstevel@tonic-gate leaky_bucket_timeout(void *arg)
4259*0Sstevel@tonic-gate {
4260*0Sstevel@tonic-gate 	int i;
4261*0Sstevel@tonic-gate 	struct ce_info *psimm = mem_ce_simm;
4262*0Sstevel@tonic-gate 
4263*0Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
4264*0Sstevel@tonic-gate 		if (psimm[i].leaky_bucket_cnt > 0)
4265*0Sstevel@tonic-gate 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4266*0Sstevel@tonic-gate 	}
4267*0Sstevel@tonic-gate 	add_leaky_bucket_timeout();
4268*0Sstevel@tonic-gate }
4269*0Sstevel@tonic-gate 
4270*0Sstevel@tonic-gate static void
4271*0Sstevel@tonic-gate add_leaky_bucket_timeout(void)
4272*0Sstevel@tonic-gate {
4273*0Sstevel@tonic-gate 	long timeout_in_microsecs;
4274*0Sstevel@tonic-gate 
4275*0Sstevel@tonic-gate 	/*
4276*0Sstevel@tonic-gate 	 * create timeout for next leak.
4277*0Sstevel@tonic-gate 	 *
4278*0Sstevel@tonic-gate 	 * The timeout interval is calculated as follows
4279*0Sstevel@tonic-gate 	 *
4280*0Sstevel@tonic-gate 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4281*0Sstevel@tonic-gate 	 *
4282*0Sstevel@tonic-gate 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4283*0Sstevel@tonic-gate 	 * in a minute), then multiply this by MICROSEC to get the interval
4284*0Sstevel@tonic-gate 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4285*0Sstevel@tonic-gate 	 * the timeout interval is accurate to within a few microseconds.
4286*0Sstevel@tonic-gate 	 */
4287*0Sstevel@tonic-gate 
4288*0Sstevel@tonic-gate 	if (ecc_softerr_limit <= 0)
4289*0Sstevel@tonic-gate 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4290*0Sstevel@tonic-gate 	if (ecc_softerr_interval <= 0)
4291*0Sstevel@tonic-gate 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4292*0Sstevel@tonic-gate 
4293*0Sstevel@tonic-gate 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4294*0Sstevel@tonic-gate 	    ecc_softerr_limit;
4295*0Sstevel@tonic-gate 
4296*0Sstevel@tonic-gate 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4297*0Sstevel@tonic-gate 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4298*0Sstevel@tonic-gate 
4299*0Sstevel@tonic-gate 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4300*0Sstevel@tonic-gate 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4301*0Sstevel@tonic-gate }
4302*0Sstevel@tonic-gate 
4303*0Sstevel@tonic-gate /*
4304*0Sstevel@tonic-gate  * Legacy Correctable ECC Error Hash
4305*0Sstevel@tonic-gate  *
4306*0Sstevel@tonic-gate  * All of the code below this comment is used to implement a legacy array
4307*0Sstevel@tonic-gate  * which counted intermittent, persistent, and sticky CE errors by unum,
4308*0Sstevel@tonic-gate  * and then was later extended to publish the data as a kstat for SunVTS.
4309*0Sstevel@tonic-gate  * All of this code is replaced by FMA, and remains here until such time
4310*0Sstevel@tonic-gate  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4311*0Sstevel@tonic-gate  *
4312*0Sstevel@tonic-gate  * Errors are saved in three buckets per-unum:
4313*0Sstevel@tonic-gate  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4314*0Sstevel@tonic-gate  *     This could represent a problem, and is immediately printed out.
4315*0Sstevel@tonic-gate  * (2) persistent - was successfully scrubbed
4316*0Sstevel@tonic-gate  *     These errors use the leaky bucket algorithm to determine
4317*0Sstevel@tonic-gate  *     if there is a serious problem.
4318*0Sstevel@tonic-gate  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4319*0Sstevel@tonic-gate  *     and does not necessarily indicate any problem with the dimm itself,
4320*0Sstevel@tonic-gate  *     is critical information for debugging new hardware.
4321*0Sstevel@tonic-gate  *     Because we do not know if it came from the dimm, it would be
4322*0Sstevel@tonic-gate  *     inappropriate to include these in the leaky bucket counts.
4323*0Sstevel@tonic-gate  *
4324*0Sstevel@tonic-gate  * If the E$ line was modified before the scrub operation began, then the
4325*0Sstevel@tonic-gate  * displacement flush at the beginning of scrubphys() will cause the modified
4326*0Sstevel@tonic-gate  * line to be written out, which will clean up the CE.  Then, any subsequent
4327*0Sstevel@tonic-gate  * read will not cause an error, which will cause persistent errors to be
4328*0Sstevel@tonic-gate  * identified as intermittent.
4329*0Sstevel@tonic-gate  *
4330*0Sstevel@tonic-gate  * If a DIMM is going bad, it will produce true persistents as well as
4331*0Sstevel@tonic-gate  * false intermittents, so these intermittents can be safely ignored.
4332*0Sstevel@tonic-gate  *
4333*0Sstevel@tonic-gate  * If the error count is excessive for a DIMM, this function will return
4334*0Sstevel@tonic-gate  * PAGE_IS_FAILING, and the CPU module may then decide to remove that page
4335*0Sstevel@tonic-gate  * from use.
4336*0Sstevel@tonic-gate  */
4337*0Sstevel@tonic-gate static int
4338*0Sstevel@tonic-gate ce_count_unum(int status, int len, char *unum)
4339*0Sstevel@tonic-gate {
4340*0Sstevel@tonic-gate 	int i;
4341*0Sstevel@tonic-gate 	struct ce_info *psimm = mem_ce_simm;
4342*0Sstevel@tonic-gate 	int page_status = PAGE_IS_OK;
4343*0Sstevel@tonic-gate 
4344*0Sstevel@tonic-gate 	ASSERT(psimm != NULL);
4345*0Sstevel@tonic-gate 
4346*0Sstevel@tonic-gate 	if (len <= 0 ||
4347*0Sstevel@tonic-gate 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4348*0Sstevel@tonic-gate 		return (page_status);
4349*0Sstevel@tonic-gate 
4350*0Sstevel@tonic-gate 	/*
4351*0Sstevel@tonic-gate 	 * Initialize the leaky_bucket timeout
4352*0Sstevel@tonic-gate 	 */
4353*0Sstevel@tonic-gate 	if (casptr(&leaky_bucket_timeout_id,
4354*0Sstevel@tonic-gate 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4355*0Sstevel@tonic-gate 		add_leaky_bucket_timeout();
4356*0Sstevel@tonic-gate 
4357*0Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
4358*0Sstevel@tonic-gate 		if (psimm[i].name[0] == '\0') {
4359*0Sstevel@tonic-gate 			/*
4360*0Sstevel@tonic-gate 			 * Hit the end of the valid entries, add
4361*0Sstevel@tonic-gate 			 * a new one.
4362*0Sstevel@tonic-gate 			 */
4363*0Sstevel@tonic-gate 			(void) strncpy(psimm[i].name, unum, len);
4364*0Sstevel@tonic-gate 			if (status & ECC_STICKY) {
4365*0Sstevel@tonic-gate 				/*
4366*0Sstevel@tonic-gate 				 * Sticky - the leaky bucket is used to track
4367*0Sstevel@tonic-gate 				 * soft errors.  Since a sticky error is a
4368*0Sstevel@tonic-gate 				 * hard error and likely to be retired soon,
4369*0Sstevel@tonic-gate 				 * we do not count it in the leaky bucket.
4370*0Sstevel@tonic-gate 				 */
4371*0Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 0;
4372*0Sstevel@tonic-gate 				psimm[i].intermittent_total = 0;
4373*0Sstevel@tonic-gate 				psimm[i].persistent_total = 0;
4374*0Sstevel@tonic-gate 				psimm[i].sticky_total = 1;
4375*0Sstevel@tonic-gate 				cmn_err(CE_WARN,
4376*0Sstevel@tonic-gate 				    "[AFT0] Sticky Softerror encountered "
4377*0Sstevel@tonic-gate 				    "on Memory Module %s\n", unum);
4378*0Sstevel@tonic-gate 				page_status = PAGE_IS_FAILING;
4379*0Sstevel@tonic-gate 			} else if (status & ECC_PERSISTENT) {
4380*0Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 1;
4381*0Sstevel@tonic-gate 				psimm[i].intermittent_total = 0;
4382*0Sstevel@tonic-gate 				psimm[i].persistent_total = 1;
4383*0Sstevel@tonic-gate 				psimm[i].sticky_total = 0;
4384*0Sstevel@tonic-gate 			} else {
4385*0Sstevel@tonic-gate 				/*
4386*0Sstevel@tonic-gate 				 * Intermittent - Because the scrub operation
4387*0Sstevel@tonic-gate 				 * cannot find the error in the DIMM, we will
4388*0Sstevel@tonic-gate 				 * not count these in the leaky bucket
4389*0Sstevel@tonic-gate 				 */
4390*0Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 0;
4391*0Sstevel@tonic-gate 				psimm[i].intermittent_total = 1;
4392*0Sstevel@tonic-gate 				psimm[i].persistent_total = 0;
4393*0Sstevel@tonic-gate 				psimm[i].sticky_total = 0;
4394*0Sstevel@tonic-gate 			}
4395*0Sstevel@tonic-gate 			ecc_error_info_data.count.value.ui32++;
4396*0Sstevel@tonic-gate 			break;
4397*0Sstevel@tonic-gate 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4398*0Sstevel@tonic-gate 			/*
4399*0Sstevel@tonic-gate 			 * Found an existing entry for the current
4400*0Sstevel@tonic-gate 			 * memory module, adjust the counts.
4401*0Sstevel@tonic-gate 			 */
4402*0Sstevel@tonic-gate 			if (status & ECC_STICKY) {
4403*0Sstevel@tonic-gate 				psimm[i].sticky_total++;
4404*0Sstevel@tonic-gate 				cmn_err(CE_WARN,
4405*0Sstevel@tonic-gate 				    "[AFT0] Sticky Softerror encountered "
4406*0Sstevel@tonic-gate 				    "on Memory Module %s\n", unum);
4407*0Sstevel@tonic-gate 				page_status = PAGE_IS_FAILING;
4408*0Sstevel@tonic-gate 			} else if (status & ECC_PERSISTENT) {
4409*0Sstevel@tonic-gate 				int new_value;
4410*0Sstevel@tonic-gate 
4411*0Sstevel@tonic-gate 				new_value = atomic_add_16_nv(
4412*0Sstevel@tonic-gate 				    &psimm[i].leaky_bucket_cnt, 1);
4413*0Sstevel@tonic-gate 				psimm[i].persistent_total++;
4414*0Sstevel@tonic-gate 				if (new_value > ecc_softerr_limit) {
4415*0Sstevel@tonic-gate 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4416*0Sstevel@tonic-gate 					    " soft errors from Memory Module"
4417*0Sstevel@tonic-gate 					    " %s exceed threshold (N=%d,"
4418*0Sstevel@tonic-gate 					    " T=%dh:%02dm) triggering page"
4419*0Sstevel@tonic-gate 					    " retire", new_value, unum,
4420*0Sstevel@tonic-gate 					    ecc_softerr_limit,
4421*0Sstevel@tonic-gate 					    ecc_softerr_interval / 60,
4422*0Sstevel@tonic-gate 					    ecc_softerr_interval % 60);
4423*0Sstevel@tonic-gate 					atomic_add_16(
4424*0Sstevel@tonic-gate 					    &psimm[i].leaky_bucket_cnt, -1);
4425*0Sstevel@tonic-gate 					page_status = PAGE_IS_FAILING;
4426*0Sstevel@tonic-gate 				}
4427*0Sstevel@tonic-gate 			} else { /* Intermittent */
4428*0Sstevel@tonic-gate 				psimm[i].intermittent_total++;
4429*0Sstevel@tonic-gate 			}
4430*0Sstevel@tonic-gate 			break;
4431*0Sstevel@tonic-gate 		}
4432*0Sstevel@tonic-gate 	}
4433*0Sstevel@tonic-gate 
4434*0Sstevel@tonic-gate 	if (i >= mem_ce_simm_size)
4435*0Sstevel@tonic-gate 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4436*0Sstevel@tonic-gate 		    "space.\n");
4437*0Sstevel@tonic-gate 
4438*0Sstevel@tonic-gate 	return (page_status);
4439*0Sstevel@tonic-gate }
4440*0Sstevel@tonic-gate 
4441*0Sstevel@tonic-gate /*
4442*0Sstevel@tonic-gate  * Function to support counting of IO detected CEs.
4443*0Sstevel@tonic-gate  */
4444*0Sstevel@tonic-gate void
4445*0Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4446*0Sstevel@tonic-gate {
4447*0Sstevel@tonic-gate 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
4448*0Sstevel@tonic-gate 	    automatic_page_removal) {
4449*0Sstevel@tonic-gate 		page_t *pp = page_numtopp_nolock((pfn_t)
4450*0Sstevel@tonic-gate 		    (ecc->flt_addr >> MMU_PAGESHIFT));
4451*0Sstevel@tonic-gate 
4452*0Sstevel@tonic-gate 		if (pp) {
4453*0Sstevel@tonic-gate 			page_settoxic(pp, PAGE_IS_FAULTY);
4454*0Sstevel@tonic-gate 			(void) page_retire(pp, PAGE_IS_FAILING);
4455*0Sstevel@tonic-gate 		}
4456*0Sstevel@tonic-gate 	}
4457*0Sstevel@tonic-gate }
4458*0Sstevel@tonic-gate 
4459*0Sstevel@tonic-gate static int
4460*0Sstevel@tonic-gate ecc_kstat_update(kstat_t *ksp, int rw)
4461*0Sstevel@tonic-gate {
4462*0Sstevel@tonic-gate 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4463*0Sstevel@tonic-gate 	struct ce_info *ceip = mem_ce_simm;
4464*0Sstevel@tonic-gate 	int i = ksp->ks_instance;
4465*0Sstevel@tonic-gate 
4466*0Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
4467*0Sstevel@tonic-gate 		return (EACCES);
4468*0Sstevel@tonic-gate 
4469*0Sstevel@tonic-gate 	ASSERT(ksp->ks_data != NULL);
4470*0Sstevel@tonic-gate 	ASSERT(i < mem_ce_simm_size && i >= 0);
4471*0Sstevel@tonic-gate 
4472*0Sstevel@tonic-gate 	/*
4473*0Sstevel@tonic-gate 	 * Since we're not using locks, make sure that we don't get partial
4474*0Sstevel@tonic-gate 	 * data. The name is always copied before the counters are incremented
4475*0Sstevel@tonic-gate 	 * so only do this update routine if at least one of the counters is
4476*0Sstevel@tonic-gate 	 * non-zero, which ensures that ce_count_unum() is done, and the
4477*0Sstevel@tonic-gate 	 * string is fully copied.
4478*0Sstevel@tonic-gate 	 */
4479*0Sstevel@tonic-gate 	if (ceip[i].intermittent_total == 0 &&
4480*0Sstevel@tonic-gate 	    ceip[i].persistent_total == 0 &&
4481*0Sstevel@tonic-gate 	    ceip[i].sticky_total == 0) {
4482*0Sstevel@tonic-gate 		/*
4483*0Sstevel@tonic-gate 		 * Uninitialized or partially initialized. Ignore.
4484*0Sstevel@tonic-gate 		 * The ks_data buffer was allocated via kmem_zalloc,
4485*0Sstevel@tonic-gate 		 * so no need to bzero it.
4486*0Sstevel@tonic-gate 		 */
4487*0Sstevel@tonic-gate 		return (0);
4488*0Sstevel@tonic-gate 	}
4489*0Sstevel@tonic-gate 
4490*0Sstevel@tonic-gate 	kstat_named_setstr(&kceip->name, ceip[i].name);
4491*0Sstevel@tonic-gate 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4492*0Sstevel@tonic-gate 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4493*0Sstevel@tonic-gate 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4494*0Sstevel@tonic-gate 
4495*0Sstevel@tonic-gate 	return (0);
4496*0Sstevel@tonic-gate }
4497*0Sstevel@tonic-gate 
4498*0Sstevel@tonic-gate #define	VIS_BLOCKSIZE		64
4499*0Sstevel@tonic-gate 
4500*0Sstevel@tonic-gate int
4501*0Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4502*0Sstevel@tonic-gate {
4503*0Sstevel@tonic-gate 	int ret, watched;
4504*0Sstevel@tonic-gate 
4505*0Sstevel@tonic-gate 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4506*0Sstevel@tonic-gate 	ret = dtrace_blksuword32(addr, data, 0);
4507*0Sstevel@tonic-gate 	if (watched)
4508*0Sstevel@tonic-gate 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4509*0Sstevel@tonic-gate 
4510*0Sstevel@tonic-gate 	return (ret);
4511*0Sstevel@tonic-gate }
4512*0Sstevel@tonic-gate 
4513*0Sstevel@tonic-gate /*ARGSUSED*/
4514*0Sstevel@tonic-gate void
4515*0Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp)
4516*0Sstevel@tonic-gate {
4517*0Sstevel@tonic-gate }
4518*0Sstevel@tonic-gate 
4519*0Sstevel@tonic-gate /*ARGSUSED*/
4520*0Sstevel@tonic-gate void
4521*0Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp)
4522*0Sstevel@tonic-gate {
4523*0Sstevel@tonic-gate }
4524*0Sstevel@tonic-gate 
4525*0Sstevel@tonic-gate static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
4526*0Sstevel@tonic-gate 	(1 << TTE32M) | (1 << TTE256M));
4527*0Sstevel@tonic-gate static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
4528*0Sstevel@tonic-gate 
4529*0Sstevel@tonic-gate /*
4530*0Sstevel@tonic-gate  * The function returns the US_II mmu-specific values for the
4531*0Sstevel@tonic-gate  * hat's disable_large_pages and disable_ism_large_pages variables.
4532*0Sstevel@tonic-gate  */
4533*0Sstevel@tonic-gate int
4534*0Sstevel@tonic-gate mmu_large_pages_disabled(uint_t flag)
4535*0Sstevel@tonic-gate {
4536*0Sstevel@tonic-gate 	int pages_disable = 0;
4537*0Sstevel@tonic-gate 
4538*0Sstevel@tonic-gate 	if (flag == HAT_LOAD) {
4539*0Sstevel@tonic-gate 		pages_disable = mmu_disable_large_pages;
4540*0Sstevel@tonic-gate 	} else if (flag == HAT_LOAD_SHARE) {
4541*0Sstevel@tonic-gate 		pages_disable = mmu_disable_ism_large_pages;
4542*0Sstevel@tonic-gate 	}
4543*0Sstevel@tonic-gate 	return (pages_disable);
4544*0Sstevel@tonic-gate }
4545*0Sstevel@tonic-gate 
4546*0Sstevel@tonic-gate /*ARGSUSED*/
4547*0Sstevel@tonic-gate void
4548*0Sstevel@tonic-gate mmu_init_kernel_pgsz(struct hat *hat)
4549*0Sstevel@tonic-gate {
4550*0Sstevel@tonic-gate }
4551*0Sstevel@tonic-gate 
4552*0Sstevel@tonic-gate size_t
4553*0Sstevel@tonic-gate mmu_get_kernel_lpsize(size_t lpsize)
4554*0Sstevel@tonic-gate {
4555*0Sstevel@tonic-gate 	uint_t tte;
4556*0Sstevel@tonic-gate 
4557*0Sstevel@tonic-gate 	if (lpsize == 0) {
4558*0Sstevel@tonic-gate 		/* no setting for segkmem_lpsize in /etc/system: use default */
4559*0Sstevel@tonic-gate 		return (MMU_PAGESIZE4M);
4560*0Sstevel@tonic-gate 	}
4561*0Sstevel@tonic-gate 
4562*0Sstevel@tonic-gate 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4563*0Sstevel@tonic-gate 		if (lpsize == TTEBYTES(tte))
4564*0Sstevel@tonic-gate 			return (lpsize);
4565*0Sstevel@tonic-gate 	}
4566*0Sstevel@tonic-gate 
4567*0Sstevel@tonic-gate 	return (TTEBYTES(TTE8K));
4568*0Sstevel@tonic-gate }
4569