10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/archsystm.h>
320Sstevel@tonic-gate #include <sys/machparam.h>
330Sstevel@tonic-gate #include <sys/machsystm.h>
340Sstevel@tonic-gate #include <sys/cpu.h>
350Sstevel@tonic-gate #include <sys/elf_SPARC.h>
360Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
370Sstevel@tonic-gate #include <vm/page.h>
380Sstevel@tonic-gate #include <sys/cpuvar.h>
390Sstevel@tonic-gate #include <sys/spitregs.h>
400Sstevel@tonic-gate #include <sys/async.h>
410Sstevel@tonic-gate #include <sys/cmn_err.h>
420Sstevel@tonic-gate #include <sys/debug.h>
430Sstevel@tonic-gate #include <sys/dditypes.h>
440Sstevel@tonic-gate #include <sys/sunddi.h>
450Sstevel@tonic-gate #include <sys/cpu_module.h>
460Sstevel@tonic-gate #include <sys/prom_debug.h>
470Sstevel@tonic-gate #include <sys/vmsystm.h>
480Sstevel@tonic-gate #include <sys/prom_plat.h>
490Sstevel@tonic-gate #include <sys/sysmacros.h>
500Sstevel@tonic-gate #include <sys/intreg.h>
510Sstevel@tonic-gate #include <sys/machtrap.h>
520Sstevel@tonic-gate #include <sys/ontrap.h>
530Sstevel@tonic-gate #include <sys/ivintr.h>
540Sstevel@tonic-gate #include <sys/atomic.h>
550Sstevel@tonic-gate #include <sys/panic.h>
560Sstevel@tonic-gate #include <sys/ndifm.h>
570Sstevel@tonic-gate #include <sys/fm/protocol.h>
580Sstevel@tonic-gate #include <sys/fm/util.h>
590Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-II.h>
600Sstevel@tonic-gate #include <sys/ddi.h>
610Sstevel@tonic-gate #include <sys/ecc_kstat.h>
620Sstevel@tonic-gate #include <sys/watchpoint.h>
630Sstevel@tonic-gate #include <sys/dtrace.h>
640Sstevel@tonic-gate #include <sys/errclassify.h>
650Sstevel@tonic-gate 
660Sstevel@tonic-gate uchar_t	*ctx_pgsz_array = NULL;
670Sstevel@tonic-gate 
680Sstevel@tonic-gate /*
690Sstevel@tonic-gate  * Structure for the 8 byte ecache data dump and the associated AFSR state.
700Sstevel@tonic-gate  * There will be 8 of these structures used to dump an ecache line (64 bytes).
710Sstevel@tonic-gate  */
720Sstevel@tonic-gate typedef struct sf_ec_data_elm {
730Sstevel@tonic-gate 	uint64_t ec_d8;
740Sstevel@tonic-gate 	uint64_t ec_afsr;
750Sstevel@tonic-gate } ec_data_t;
760Sstevel@tonic-gate 
770Sstevel@tonic-gate /*
780Sstevel@tonic-gate  * Define spitfire (Ultra I/II) specific asynchronous error structure
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate typedef struct spitfire_async_flt {
810Sstevel@tonic-gate 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
820Sstevel@tonic-gate 	ushort_t flt_type;		/* types of faults - cpu specific */
830Sstevel@tonic-gate 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
840Sstevel@tonic-gate 	uint64_t flt_ec_tag;		/* E$ tag info */
850Sstevel@tonic-gate 	int flt_ec_lcnt;		/* number of bad E$ lines */
860Sstevel@tonic-gate 	ushort_t flt_sdbh;		/* UDBH reg */
870Sstevel@tonic-gate 	ushort_t flt_sdbl;		/* UDBL reg */
880Sstevel@tonic-gate } spitf_async_flt;
890Sstevel@tonic-gate 
900Sstevel@tonic-gate /*
910Sstevel@tonic-gate  * Prototypes for support routines in spitfire_asm.s:
920Sstevel@tonic-gate  */
930Sstevel@tonic-gate extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
940Sstevel@tonic-gate extern uint64_t get_lsu(void);
950Sstevel@tonic-gate extern void set_lsu(uint64_t ncc);
960Sstevel@tonic-gate extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
970Sstevel@tonic-gate 				uint64_t *oafsr, uint64_t *acc_afsr);
980Sstevel@tonic-gate extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
990Sstevel@tonic-gate extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
1000Sstevel@tonic-gate 				uint64_t *acc_afsr);
1010Sstevel@tonic-gate extern uint64_t read_and_clear_afsr();
1020Sstevel@tonic-gate extern void write_ec_tag_parity(uint32_t id);
1030Sstevel@tonic-gate extern void write_hb_ec_tag_parity(uint32_t id);
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate /*
1060Sstevel@tonic-gate  * Spitfire module routines:
1070Sstevel@tonic-gate  */
1080Sstevel@tonic-gate static void cpu_async_log_err(void *flt);
1090Sstevel@tonic-gate /*PRINTFLIKE6*/
1100Sstevel@tonic-gate static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
1110Sstevel@tonic-gate     uint_t logflags, const char *endstr, const char *fmt, ...);
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
1140Sstevel@tonic-gate static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
1150Sstevel@tonic-gate static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate static void log_ce_err(struct async_flt *aflt, char *unum);
1180Sstevel@tonic-gate static void log_ue_err(struct async_flt *aflt, char *unum);
1190Sstevel@tonic-gate static void check_misc_err(spitf_async_flt *spf_flt);
1200Sstevel@tonic-gate static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
1210Sstevel@tonic-gate static int check_ecc(struct async_flt *aflt);
1220Sstevel@tonic-gate static uint_t get_cpu_status(uint64_t arg);
1230Sstevel@tonic-gate static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
1240Sstevel@tonic-gate static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
1250Sstevel@tonic-gate 		int *m, uint64_t *afsr);
1260Sstevel@tonic-gate static void ecache_kstat_init(struct cpu *cp);
1270Sstevel@tonic-gate static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
1280Sstevel@tonic-gate 		uint64_t paddr, int mpb, uint64_t);
1290Sstevel@tonic-gate static uint64_t ecache_scrub_misc_err(int, uint64_t);
1300Sstevel@tonic-gate static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
1310Sstevel@tonic-gate static void ecache_page_retire(void *);
1320Sstevel@tonic-gate static int ecc_kstat_update(kstat_t *ksp, int rw);
1330Sstevel@tonic-gate static int ce_count_unum(int status, int len, char *unum);
1340Sstevel@tonic-gate static void add_leaky_bucket_timeout(void);
1350Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd);
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate extern uint_t read_all_memscrub;
1380Sstevel@tonic-gate extern void memscrub_run(void);
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate static uchar_t	isus2i;			/* set if sabre */
1410Sstevel@tonic-gate static uchar_t	isus2e;			/* set if hummingbird */
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate /*
1440Sstevel@tonic-gate  * Default ecache mask and shift settings for Spitfire.  If we detect a
1450Sstevel@tonic-gate  * different CPU implementation, we will modify these values at boot time.
1460Sstevel@tonic-gate  */
1470Sstevel@tonic-gate static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
1480Sstevel@tonic-gate static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
1490Sstevel@tonic-gate static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
1500Sstevel@tonic-gate static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
1510Sstevel@tonic-gate static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
1520Sstevel@tonic-gate static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
1530Sstevel@tonic-gate static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
1540Sstevel@tonic-gate static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
1550Sstevel@tonic-gate static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
1560Sstevel@tonic-gate static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate /*
1590Sstevel@tonic-gate  * Default ecache state bits for Spitfire.  These individual bits indicate if
1600Sstevel@tonic-gate  * the given line is in any of the valid or modified states, respectively.
1610Sstevel@tonic-gate  * Again, we modify these at boot if we detect a different CPU.
1620Sstevel@tonic-gate  */
1630Sstevel@tonic-gate static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
1640Sstevel@tonic-gate static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
1650Sstevel@tonic-gate static uchar_t cpu_ec_parity		= S_EC_PARITY;
1660Sstevel@tonic-gate static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate /*
1690Sstevel@tonic-gate  * This table is used to determine which bit(s) is(are) bad when an ECC
1700Sstevel@tonic-gate  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
1710Sstevel@tonic-gate  * of this array have the following semantics:
1720Sstevel@tonic-gate  *
1730Sstevel@tonic-gate  *      00-63   The number of the bad bit, when only one bit is bad.
1740Sstevel@tonic-gate  *      64      ECC bit C0 is bad.
1750Sstevel@tonic-gate  *      65      ECC bit C1 is bad.
1760Sstevel@tonic-gate  *      66      ECC bit C2 is bad.
1770Sstevel@tonic-gate  *      67      ECC bit C3 is bad.
1780Sstevel@tonic-gate  *      68      ECC bit C4 is bad.
1790Sstevel@tonic-gate  *      69      ECC bit C5 is bad.
1800Sstevel@tonic-gate  *      70      ECC bit C6 is bad.
1810Sstevel@tonic-gate  *      71      ECC bit C7 is bad.
1820Sstevel@tonic-gate  *      72      Two bits are bad.
1830Sstevel@tonic-gate  *      73      Three bits are bad.
1840Sstevel@tonic-gate  *      74      Four bits are bad.
1850Sstevel@tonic-gate  *      75      More than Four bits are bad.
1860Sstevel@tonic-gate  *      76      NO bits are bad.
1870Sstevel@tonic-gate  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
1880Sstevel@tonic-gate  */
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate #define	C0	64
1910Sstevel@tonic-gate #define	C1	65
1920Sstevel@tonic-gate #define	C2	66
1930Sstevel@tonic-gate #define	C3	67
1940Sstevel@tonic-gate #define	C4	68
1950Sstevel@tonic-gate #define	C5	69
1960Sstevel@tonic-gate #define	C6	70
1970Sstevel@tonic-gate #define	C7	71
1980Sstevel@tonic-gate #define	M2	72
1990Sstevel@tonic-gate #define	M3	73
2000Sstevel@tonic-gate #define	M4	74
2010Sstevel@tonic-gate #define	MX	75
2020Sstevel@tonic-gate #define	NA	76
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
2050Sstevel@tonic-gate 						    (synd_code < C0))
2060Sstevel@tonic-gate #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
2070Sstevel@tonic-gate 						    (synd_code <= C7))
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate static char ecc_syndrome_tab[] =
2100Sstevel@tonic-gate {
2110Sstevel@tonic-gate 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
2120Sstevel@tonic-gate 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
2130Sstevel@tonic-gate 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
2140Sstevel@tonic-gate 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
2150Sstevel@tonic-gate 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
2160Sstevel@tonic-gate 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
2170Sstevel@tonic-gate 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
2180Sstevel@tonic-gate 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
2190Sstevel@tonic-gate 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
2200Sstevel@tonic-gate 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
2210Sstevel@tonic-gate 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
2220Sstevel@tonic-gate 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
2230Sstevel@tonic-gate 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
2240Sstevel@tonic-gate 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
2250Sstevel@tonic-gate 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
2260Sstevel@tonic-gate 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
2270Sstevel@tonic-gate };
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate #define	SYND_TBL_SIZE 256
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate /*
2320Sstevel@tonic-gate  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
2330Sstevel@tonic-gate  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
2340Sstevel@tonic-gate  */
2350Sstevel@tonic-gate #define	UDBL_REG	0x8000
2360Sstevel@tonic-gate #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
2370Sstevel@tonic-gate #define	SYND(synd)	(synd & 0x7FFF)
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate /*
2400Sstevel@tonic-gate  * These error types are specific to Spitfire and are used internally for the
2410Sstevel@tonic-gate  * spitfire fault structure flt_type field.
2420Sstevel@tonic-gate  */
2430Sstevel@tonic-gate #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
2440Sstevel@tonic-gate #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
2450Sstevel@tonic-gate #define	CPU_WP_ERR		2	/* WP parity error */
2460Sstevel@tonic-gate #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
2470Sstevel@tonic-gate #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
2480Sstevel@tonic-gate #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
2490Sstevel@tonic-gate #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
2500Sstevel@tonic-gate #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
2510Sstevel@tonic-gate #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
2520Sstevel@tonic-gate #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
2530Sstevel@tonic-gate #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
2540Sstevel@tonic-gate #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
2550Sstevel@tonic-gate #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
2560Sstevel@tonic-gate #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
2570Sstevel@tonic-gate #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
2580Sstevel@tonic-gate #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate /*
2610Sstevel@tonic-gate  * Macro to access the "Spitfire cpu private" data structure.
2620Sstevel@tonic-gate  */
2630Sstevel@tonic-gate #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /*
2660Sstevel@tonic-gate  * set to 0 to disable automatic retiring of pages on
2670Sstevel@tonic-gate  * DIMMs that have excessive soft errors
2680Sstevel@tonic-gate  */
2690Sstevel@tonic-gate int automatic_page_removal = 1;
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate /*
2720Sstevel@tonic-gate  * Heuristic for figuring out which module to replace.
2730Sstevel@tonic-gate  * Relative likelihood that this P_SYND indicates that this module is bad.
2740Sstevel@tonic-gate  * We call it a "score", though, not a relative likelihood.
2750Sstevel@tonic-gate  *
2760Sstevel@tonic-gate  * Step 1.
2770Sstevel@tonic-gate  * Assign a score to each byte of P_SYND according to the following rules:
2780Sstevel@tonic-gate  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
2790Sstevel@tonic-gate  * If one bit on, give it a 95.
2800Sstevel@tonic-gate  * If seven bits on, give it a 10.
2810Sstevel@tonic-gate  * If two bits on:
2820Sstevel@tonic-gate  *   in different nybbles, a 90
2830Sstevel@tonic-gate  *   in same nybble, but unaligned, 85
2840Sstevel@tonic-gate  *   in same nybble and as an aligned pair, 80
2850Sstevel@tonic-gate  * If six bits on, look at the bits that are off:
2860Sstevel@tonic-gate  *   in same nybble and as an aligned pair, 15
2870Sstevel@tonic-gate  *   in same nybble, but unaligned, 20
2880Sstevel@tonic-gate  *   in different nybbles, a 25
2890Sstevel@tonic-gate  * If three bits on:
2900Sstevel@tonic-gate  *   in diferent nybbles, no aligned pairs, 75
2910Sstevel@tonic-gate  *   in diferent nybbles, one aligned pair, 70
2920Sstevel@tonic-gate  *   in the same nybble, 65
2930Sstevel@tonic-gate  * If five bits on, look at the bits that are off:
2940Sstevel@tonic-gate  *   in the same nybble, 30
2950Sstevel@tonic-gate  *   in diferent nybbles, one aligned pair, 35
2960Sstevel@tonic-gate  *   in diferent nybbles, no aligned pairs, 40
2970Sstevel@tonic-gate  * If four bits on:
2980Sstevel@tonic-gate  *   all in one nybble, 45
2990Sstevel@tonic-gate  *   as two aligned pairs, 50
3000Sstevel@tonic-gate  *   one aligned pair, 55
3010Sstevel@tonic-gate  *   no aligned pairs, 60
3020Sstevel@tonic-gate  *
3030Sstevel@tonic-gate  * Step 2:
3040Sstevel@tonic-gate  * Take the higher of the two scores (one for each byte) as the score
3050Sstevel@tonic-gate  * for the module.
3060Sstevel@tonic-gate  *
3070Sstevel@tonic-gate  * Print the score for each module, and field service should replace the
3080Sstevel@tonic-gate  * module with the highest score.
3090Sstevel@tonic-gate  */
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate /*
3120Sstevel@tonic-gate  * In the table below, the first row/column comment indicates the
3130Sstevel@tonic-gate  * number of bits on in that nybble; the second row/column comment is
3140Sstevel@tonic-gate  * the hex digit.
3150Sstevel@tonic-gate  */
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate static int
3180Sstevel@tonic-gate p_synd_score_table[256] = {
3190Sstevel@tonic-gate 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
3200Sstevel@tonic-gate 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
3210Sstevel@tonic-gate /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
3220Sstevel@tonic-gate /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
3230Sstevel@tonic-gate /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
3240Sstevel@tonic-gate /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
3250Sstevel@tonic-gate /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
3260Sstevel@tonic-gate /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
3270Sstevel@tonic-gate /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
3280Sstevel@tonic-gate /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
3290Sstevel@tonic-gate /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
3300Sstevel@tonic-gate /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
3310Sstevel@tonic-gate /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
3320Sstevel@tonic-gate /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
3330Sstevel@tonic-gate /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
3340Sstevel@tonic-gate /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
3350Sstevel@tonic-gate /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
3360Sstevel@tonic-gate /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
3370Sstevel@tonic-gate };
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate int
3400Sstevel@tonic-gate ecc_psynd_score(ushort_t p_synd)
3410Sstevel@tonic-gate {
3420Sstevel@tonic-gate 	int i, j, a, b;
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate 	i = p_synd & 0xFF;
3450Sstevel@tonic-gate 	j = (p_synd >> 8) & 0xFF;
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 	a = p_synd_score_table[i];
3480Sstevel@tonic-gate 	b = p_synd_score_table[j];
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	return (a > b ? a : b);
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate /*
3540Sstevel@tonic-gate  * Async Fault Logging
3550Sstevel@tonic-gate  *
3560Sstevel@tonic-gate  * To ease identifying, reading, and filtering async fault log messages, the
3570Sstevel@tonic-gate  * label [AFT#] is now prepended to each async fault message.  These messages
3580Sstevel@tonic-gate  * and the logging rules are implemented by cpu_aflt_log(), below.
3590Sstevel@tonic-gate  *
3600Sstevel@tonic-gate  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
3610Sstevel@tonic-gate  *          This includes both corrected ECC memory and ecache faults.
3620Sstevel@tonic-gate  *
3630Sstevel@tonic-gate  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
3640Sstevel@tonic-gate  *          else except CE errors) with a priority of 1 (highest).  This tag
3650Sstevel@tonic-gate  *          is also used for panic messages that result from an async fault.
3660Sstevel@tonic-gate  *
3670Sstevel@tonic-gate  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
3680Sstevel@tonic-gate  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
3690Sstevel@tonic-gate  *          of the E-$ data and tags.
3700Sstevel@tonic-gate  *
3710Sstevel@tonic-gate  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
3720Sstevel@tonic-gate  * printed on the console.  To send all AFT logs to both the log and the
3730Sstevel@tonic-gate  * console, set aft_verbose = 1.
3740Sstevel@tonic-gate  */
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
3770Sstevel@tonic-gate #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
3780Sstevel@tonic-gate #define	CPU_ERRID		0x0004	/* print flt_id */
3790Sstevel@tonic-gate #define	CPU_TL			0x0008	/* print flt_tl */
3800Sstevel@tonic-gate #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
3810Sstevel@tonic-gate #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
3820Sstevel@tonic-gate #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
3830Sstevel@tonic-gate #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
3840Sstevel@tonic-gate #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
3850Sstevel@tonic-gate #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
3860Sstevel@tonic-gate #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
3870Sstevel@tonic-gate #define	CPU_FAULTPC		0x0800	/* print flt_pc */
3880Sstevel@tonic-gate #define	CPU_SYND		0x1000	/* print flt_synd and unum */
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
3910Sstevel@tonic-gate 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
3920Sstevel@tonic-gate 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
3930Sstevel@tonic-gate 				CPU_FAULTPC)
3940Sstevel@tonic-gate #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
3950Sstevel@tonic-gate #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
3960Sstevel@tonic-gate 				~CPU_SPACE)
3970Sstevel@tonic-gate #define	PARERR_LFLAGS	(CMN_LFLAGS)
3980Sstevel@tonic-gate #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
3990Sstevel@tonic-gate #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
4000Sstevel@tonic-gate 				~CPU_FLTCPU & ~CPU_FAULTPC)
4010Sstevel@tonic-gate #define	BERRTO_LFLAGS	(CMN_LFLAGS)
4020Sstevel@tonic-gate #define	NO_LFLAGS	(0)
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate #define	AFSR_FMTSTR0	"\020\1ME"
4050Sstevel@tonic-gate #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
4060Sstevel@tonic-gate 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
4070Sstevel@tonic-gate #define	UDB_FMTSTR	"\020\012UE\011CE"
4080Sstevel@tonic-gate 
4090Sstevel@tonic-gate /*
4100Sstevel@tonic-gate  * Maximum number of contexts for Spitfire.
4110Sstevel@tonic-gate  */
4120Sstevel@tonic-gate #define	MAX_NCTXS	(1 << 13)
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate /*
4150Sstevel@tonic-gate  * Save the cache bootup state for use when internal
4160Sstevel@tonic-gate  * caches are to be re-enabled after an error occurs.
4170Sstevel@tonic-gate  */
4180Sstevel@tonic-gate uint64_t	cache_boot_state = 0;
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate /*
4210Sstevel@tonic-gate  * PA[31:0] represent Displacement in UPA configuration space.
4220Sstevel@tonic-gate  */
4230Sstevel@tonic-gate uint_t	root_phys_addr_lo_mask = 0xffffffff;
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate /*
4260Sstevel@tonic-gate  * Spitfire legacy globals
4270Sstevel@tonic-gate  */
4280Sstevel@tonic-gate int	itlb_entries;
4290Sstevel@tonic-gate int	dtlb_entries;
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate void
4320Sstevel@tonic-gate cpu_setup(void)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate 	extern int page_retire_messages;
4350Sstevel@tonic-gate 	extern int at_flags;
4360Sstevel@tonic-gate #if defined(SF_ERRATA_57)
4370Sstevel@tonic-gate 	extern caddr_t errata57_limit;
4380Sstevel@tonic-gate #endif
4390Sstevel@tonic-gate 	extern int disable_text_largepages;
4400Sstevel@tonic-gate 	extern int disable_initdata_largepages;
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	/*
4470Sstevel@tonic-gate 	 * Spitfire isn't currently FMA-aware, so we have to enable the
4480Sstevel@tonic-gate 	 * page retirement messages.
4490Sstevel@tonic-gate 	 */
4500Sstevel@tonic-gate 	page_retire_messages = 1;
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	/*
4530Sstevel@tonic-gate 	 * save the cache bootup state.
4540Sstevel@tonic-gate 	 */
4550Sstevel@tonic-gate 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	/*
4580Sstevel@tonic-gate 	 * Use the maximum number of contexts available for Spitfire unless
4590Sstevel@tonic-gate 	 * it has been tuned for debugging.
4600Sstevel@tonic-gate 	 * We are checking against 0 here since this value can be patched
4610Sstevel@tonic-gate 	 * while booting.  It can not be patched via /etc/system since it
4620Sstevel@tonic-gate 	 * will be patched too late and thus cause the system to panic.
4630Sstevel@tonic-gate 	 */
4640Sstevel@tonic-gate 	if (nctxs == 0)
4650Sstevel@tonic-gate 		nctxs = MAX_NCTXS;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	if (use_page_coloring) {
4680Sstevel@tonic-gate 		do_pg_coloring = 1;
4690Sstevel@tonic-gate 		if (use_virtual_coloring)
4700Sstevel@tonic-gate 			do_virtual_coloring = 1;
4710Sstevel@tonic-gate 	}
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	/*
4740Sstevel@tonic-gate 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
4750Sstevel@tonic-gate 	 */
4760Sstevel@tonic-gate 	pp_slots = MIN(8, MAXPP_SLOTS);
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 	/*
4790Sstevel@tonic-gate 	 * Block stores invalidate all pages of the d$ so pagecopy
4800Sstevel@tonic-gate 	 * et. al. do not need virtual translations with virtual
4810Sstevel@tonic-gate 	 * coloring taken into consideration.
4820Sstevel@tonic-gate 	 */
4830Sstevel@tonic-gate 	pp_consistent_coloring = 0;
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate 	isa_list =
4860Sstevel@tonic-gate 	    "sparcv9+vis sparcv9 "
4870Sstevel@tonic-gate 	    "sparcv8plus+vis sparcv8plus "
4880Sstevel@tonic-gate 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	cpu_hwcap_flags = AV_SPARC_VIS;
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 	/*
4930Sstevel@tonic-gate 	 * On Spitfire, there's a hole in the address space
4940Sstevel@tonic-gate 	 * that we must never map (the hardware only support 44-bits of
4950Sstevel@tonic-gate 	 * virtual address).  Later CPUs are expected to have wider
4960Sstevel@tonic-gate 	 * supported address ranges.
4970Sstevel@tonic-gate 	 *
4980Sstevel@tonic-gate 	 * See address map on p23 of the UltraSPARC 1 user's manual.
4990Sstevel@tonic-gate 	 */
5000Sstevel@tonic-gate 	hole_start = (caddr_t)0x80000000000ull;
5010Sstevel@tonic-gate 	hole_end = (caddr_t)0xfffff80000000000ull;
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	/*
5040Sstevel@tonic-gate 	 * A spitfire call bug requires us to be a further 4Gbytes of
5050Sstevel@tonic-gate 	 * firewall from the spec.
5060Sstevel@tonic-gate 	 *
5070Sstevel@tonic-gate 	 * See Spitfire Errata #21
5080Sstevel@tonic-gate 	 */
5090Sstevel@tonic-gate 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
5100Sstevel@tonic-gate 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 	/*
5130Sstevel@tonic-gate 	 * The kpm mapping window.
5140Sstevel@tonic-gate 	 * kpm_size:
5150Sstevel@tonic-gate 	 *	The size of a single kpm range.
5160Sstevel@tonic-gate 	 *	The overall size will be: kpm_size * vac_colors.
5170Sstevel@tonic-gate 	 * kpm_vbase:
5180Sstevel@tonic-gate 	 *	The virtual start address of the kpm range within the kernel
5190Sstevel@tonic-gate 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
5200Sstevel@tonic-gate 	 */
5210Sstevel@tonic-gate 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
5220Sstevel@tonic-gate 	kpm_size_shift = 41;
5230Sstevel@tonic-gate 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate #if defined(SF_ERRATA_57)
5260Sstevel@tonic-gate 	errata57_limit = (caddr_t)0x80000000ul;
5270Sstevel@tonic-gate #endif
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	/*
5300Sstevel@tonic-gate 	 * Allow only 8K, 64K and 4M pages for text by default.
5310Sstevel@tonic-gate 	 * Allow only 8K and 64K page for initialized data segments by
5320Sstevel@tonic-gate 	 * default.
5330Sstevel@tonic-gate 	 */
5340Sstevel@tonic-gate 	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
5350Sstevel@tonic-gate 	    (1 << TTE256M);
5360Sstevel@tonic-gate 	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
5370Sstevel@tonic-gate 	    (1 << TTE32M) | (1 << TTE256M);
5380Sstevel@tonic-gate }
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate static int
541*789Sahrens getintprop(pnode_t node, char *name, int deflt)
5420Sstevel@tonic-gate {
5430Sstevel@tonic-gate 	int	value;
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 	switch (prom_getproplen(node, name)) {
5460Sstevel@tonic-gate 	case 0:
5470Sstevel@tonic-gate 		value = 1;	/* boolean properties */
5480Sstevel@tonic-gate 		break;
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate 	case sizeof (int):
5510Sstevel@tonic-gate 		(void) prom_getprop(node, name, (caddr_t)&value);
5520Sstevel@tonic-gate 		break;
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 	default:
5550Sstevel@tonic-gate 		value = deflt;
5560Sstevel@tonic-gate 		break;
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate 	return (value);
5600Sstevel@tonic-gate }
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate /*
5630Sstevel@tonic-gate  * Set the magic constants of the implementation.
5640Sstevel@tonic-gate  */
5650Sstevel@tonic-gate void
566*789Sahrens cpu_fiximp(pnode_t dnode)
5670Sstevel@tonic-gate {
5680Sstevel@tonic-gate 	extern int vac_size, vac_shift;
5690Sstevel@tonic-gate 	extern uint_t vac_mask;
5700Sstevel@tonic-gate 	extern int dcache_line_mask;
5710Sstevel@tonic-gate 	int i, a;
5720Sstevel@tonic-gate 	static struct {
5730Sstevel@tonic-gate 		char	*name;
5740Sstevel@tonic-gate 		int	*var;
5750Sstevel@tonic-gate 	} prop[] = {
5760Sstevel@tonic-gate 		"dcache-size",		&dcache_size,
5770Sstevel@tonic-gate 		"dcache-line-size",	&dcache_linesize,
5780Sstevel@tonic-gate 		"icache-size",		&icache_size,
5790Sstevel@tonic-gate 		"icache-line-size",	&icache_linesize,
5800Sstevel@tonic-gate 		"ecache-size",		&ecache_size,
5810Sstevel@tonic-gate 		"ecache-line-size",	&ecache_alignsize,
5820Sstevel@tonic-gate 		"ecache-associativity", &ecache_associativity,
5830Sstevel@tonic-gate 		"#itlb-entries",	&itlb_entries,
5840Sstevel@tonic-gate 		"#dtlb-entries",	&dtlb_entries,
5850Sstevel@tonic-gate 		};
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
5880Sstevel@tonic-gate 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
5890Sstevel@tonic-gate 			*prop[i].var = a;
5900Sstevel@tonic-gate 		}
5910Sstevel@tonic-gate 	}
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	ecache_setsize = ecache_size / ecache_associativity;
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 	vac_size = S_VAC_SIZE;
5960Sstevel@tonic-gate 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
5970Sstevel@tonic-gate 	i = 0; a = vac_size;
5980Sstevel@tonic-gate 	while (a >>= 1)
5990Sstevel@tonic-gate 		++i;
6000Sstevel@tonic-gate 	vac_shift = i;
6010Sstevel@tonic-gate 	shm_alignment = vac_size;
6020Sstevel@tonic-gate 	vac = 1;
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate 	/*
6070Sstevel@tonic-gate 	 * UltraSPARC I & II have ecache sizes running
6080Sstevel@tonic-gate 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
6090Sstevel@tonic-gate 	 * and 8 MB. Adjust the copyin/copyout limits
6100Sstevel@tonic-gate 	 * according to the cache size. The magic number
6110Sstevel@tonic-gate 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
6120Sstevel@tonic-gate 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
6130Sstevel@tonic-gate 	 * VIS instructions.
6140Sstevel@tonic-gate 	 *
6150Sstevel@tonic-gate 	 * We assume that all CPUs on the system have the same size
6160Sstevel@tonic-gate 	 * ecache. We're also called very early in the game.
6170Sstevel@tonic-gate 	 * /etc/system will be parsed *after* we're called so
6180Sstevel@tonic-gate 	 * these values can be overwritten.
6190Sstevel@tonic-gate 	 */
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6220Sstevel@tonic-gate 	if (ecache_size <= 524288) {
6230Sstevel@tonic-gate 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
6240Sstevel@tonic-gate 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
6250Sstevel@tonic-gate 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
6260Sstevel@tonic-gate 	} else if (ecache_size == 1048576) {
6270Sstevel@tonic-gate 		hw_copy_limit_2 = 1024;
6280Sstevel@tonic-gate 		hw_copy_limit_4 = 1280;
6290Sstevel@tonic-gate 		hw_copy_limit_8 = 1536;
6300Sstevel@tonic-gate 	} else if (ecache_size == 2097152) {
6310Sstevel@tonic-gate 		hw_copy_limit_2 = 1536;
6320Sstevel@tonic-gate 		hw_copy_limit_4 = 2048;
6330Sstevel@tonic-gate 		hw_copy_limit_8 = 2560;
6340Sstevel@tonic-gate 	} else if (ecache_size == 4194304) {
6350Sstevel@tonic-gate 		hw_copy_limit_2 = 2048;
6360Sstevel@tonic-gate 		hw_copy_limit_4 = 2560;
6370Sstevel@tonic-gate 		hw_copy_limit_8 = 3072;
6380Sstevel@tonic-gate 	} else {
6390Sstevel@tonic-gate 		hw_copy_limit_2 = 2560;
6400Sstevel@tonic-gate 		hw_copy_limit_4 = 3072;
6410Sstevel@tonic-gate 		hw_copy_limit_8 = 3584;
6420Sstevel@tonic-gate 	}
6430Sstevel@tonic-gate }
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate /*
6460Sstevel@tonic-gate  * Called by setcpudelay
6470Sstevel@tonic-gate  */
6480Sstevel@tonic-gate void
6490Sstevel@tonic-gate cpu_init_tick_freq(void)
6500Sstevel@tonic-gate {
6510Sstevel@tonic-gate 	/*
6520Sstevel@tonic-gate 	 * Determine the cpu frequency by calling
6530Sstevel@tonic-gate 	 * tod_get_cpufrequency. Use an approximate freqency
6540Sstevel@tonic-gate 	 * value computed by the prom if the tod module
6550Sstevel@tonic-gate 	 * is not initialized and loaded yet.
6560Sstevel@tonic-gate 	 */
6570Sstevel@tonic-gate 	if (tod_ops.tod_get_cpufrequency != NULL) {
6580Sstevel@tonic-gate 		mutex_enter(&tod_lock);
6590Sstevel@tonic-gate 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
6600Sstevel@tonic-gate 		mutex_exit(&tod_lock);
6610Sstevel@tonic-gate 	} else {
6620Sstevel@tonic-gate #if defined(HUMMINGBIRD)
6630Sstevel@tonic-gate 		/*
6640Sstevel@tonic-gate 		 * the hummingbird version of %stick is used as the basis for
6650Sstevel@tonic-gate 		 * low level timing; this provides an independent constant-rate
6660Sstevel@tonic-gate 		 * clock for general system use, and frees power mgmt to set
6670Sstevel@tonic-gate 		 * various cpu clock speeds.
6680Sstevel@tonic-gate 		 */
6690Sstevel@tonic-gate 		if (system_clock_freq == 0)
6700Sstevel@tonic-gate 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
6710Sstevel@tonic-gate 			    system_clock_freq);
6720Sstevel@tonic-gate 		sys_tick_freq = system_clock_freq;
6730Sstevel@tonic-gate #else /* SPITFIRE */
6740Sstevel@tonic-gate 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
6750Sstevel@tonic-gate #endif
6760Sstevel@tonic-gate 	}
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate void shipit(int upaid);
6810Sstevel@tonic-gate extern uint64_t xc_tick_limit;
6820Sstevel@tonic-gate extern uint64_t xc_tick_jump_limit;
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
6850Sstevel@tonic-gate uint64_t x_early[NCPU][64];
6860Sstevel@tonic-gate #endif
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate /*
6890Sstevel@tonic-gate  * Note: A version of this function is used by the debugger via the KDI,
6900Sstevel@tonic-gate  * and must be kept in sync with this version.  Any changes made to this
6910Sstevel@tonic-gate  * function to support new chips or to accomodate errata must also be included
6920Sstevel@tonic-gate  * in the KDI-specific version.  See spitfire_kdi.c.
6930Sstevel@tonic-gate  */
6940Sstevel@tonic-gate void
6950Sstevel@tonic-gate send_one_mondo(int cpuid)
6960Sstevel@tonic-gate {
6970Sstevel@tonic-gate 	uint64_t idsr, starttick, endtick;
6980Sstevel@tonic-gate 	int upaid, busy, nack;
6990Sstevel@tonic-gate 	uint64_t tick, tick_prev;
7000Sstevel@tonic-gate 	ulong_t ticks;
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
7030Sstevel@tonic-gate 	upaid = CPUID_TO_UPAID(cpuid);
7040Sstevel@tonic-gate 	tick = starttick = gettick();
7050Sstevel@tonic-gate 	shipit(upaid);
7060Sstevel@tonic-gate 	endtick = starttick + xc_tick_limit;
7070Sstevel@tonic-gate 	busy = nack = 0;
7080Sstevel@tonic-gate 	for (;;) {
7090Sstevel@tonic-gate 		idsr = getidsr();
7100Sstevel@tonic-gate 		if (idsr == 0)
7110Sstevel@tonic-gate 			break;
7120Sstevel@tonic-gate 		/*
7130Sstevel@tonic-gate 		 * When we detect an irregular tick jump, we adjust
7140Sstevel@tonic-gate 		 * the timer window to the current tick value.
7150Sstevel@tonic-gate 		 */
7160Sstevel@tonic-gate 		tick_prev = tick;
7170Sstevel@tonic-gate 		tick = gettick();
7180Sstevel@tonic-gate 		ticks = tick - tick_prev;
7190Sstevel@tonic-gate 		if (ticks > xc_tick_jump_limit) {
7200Sstevel@tonic-gate 			endtick = tick + xc_tick_limit;
7210Sstevel@tonic-gate 		} else if (tick > endtick) {
7220Sstevel@tonic-gate 			if (panic_quiesce)
7230Sstevel@tonic-gate 				return;
7240Sstevel@tonic-gate 			cmn_err(CE_PANIC,
7250Sstevel@tonic-gate 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
7260Sstevel@tonic-gate 			upaid, nack, busy);
7270Sstevel@tonic-gate 		}
7280Sstevel@tonic-gate 		if (idsr & IDSR_BUSY) {
7290Sstevel@tonic-gate 			busy++;
7300Sstevel@tonic-gate 			continue;
7310Sstevel@tonic-gate 		}
7320Sstevel@tonic-gate 		drv_usecwait(1);
7330Sstevel@tonic-gate 		shipit(upaid);
7340Sstevel@tonic-gate 		nack++;
7350Sstevel@tonic-gate 		busy = 0;
7360Sstevel@tonic-gate 	}
7370Sstevel@tonic-gate #ifdef SEND_MONDO_STATS
7380Sstevel@tonic-gate 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
7390Sstevel@tonic-gate #endif
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate void
7430Sstevel@tonic-gate send_mondo_set(cpuset_t set)
7440Sstevel@tonic-gate {
7450Sstevel@tonic-gate 	int i;
7460Sstevel@tonic-gate 
7470Sstevel@tonic-gate 	for (i = 0; i < NCPU; i++)
7480Sstevel@tonic-gate 		if (CPU_IN_SET(set, i)) {
7490Sstevel@tonic-gate 			send_one_mondo(i);
7500Sstevel@tonic-gate 			CPUSET_DEL(set, i);
7510Sstevel@tonic-gate 			if (CPUSET_ISNULL(set))
7520Sstevel@tonic-gate 				break;
7530Sstevel@tonic-gate 		}
7540Sstevel@tonic-gate }
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate void
7570Sstevel@tonic-gate syncfpu(void)
7580Sstevel@tonic-gate {
7590Sstevel@tonic-gate }
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate /*
7620Sstevel@tonic-gate  * Determine the size of the CPU module's error structure in bytes.  This is
7630Sstevel@tonic-gate  * called once during boot to initialize the error queues.
7640Sstevel@tonic-gate  */
7650Sstevel@tonic-gate int
7660Sstevel@tonic-gate cpu_aflt_size(void)
7670Sstevel@tonic-gate {
7680Sstevel@tonic-gate 	/*
7690Sstevel@tonic-gate 	 * We need to determine whether this is a sabre, Hummingbird or a
7700Sstevel@tonic-gate 	 * Spitfire/Blackbird impl and set the appropriate state variables for
7710Sstevel@tonic-gate 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
7720Sstevel@tonic-gate 	 * too early in the boot flow and the cpunodes are not initialized.
7730Sstevel@tonic-gate 	 * This routine will be called once after cpunodes[] is ready, so do
7740Sstevel@tonic-gate 	 * it here.
7750Sstevel@tonic-gate 	 */
7760Sstevel@tonic-gate 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
7770Sstevel@tonic-gate 		isus2i = 1;
7780Sstevel@tonic-gate 		cpu_ec_tag_mask = SB_ECTAG_MASK;
7790Sstevel@tonic-gate 		cpu_ec_state_mask = SB_ECSTATE_MASK;
7800Sstevel@tonic-gate 		cpu_ec_par_mask = SB_ECPAR_MASK;
7810Sstevel@tonic-gate 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
7820Sstevel@tonic-gate 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
7830Sstevel@tonic-gate 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
7840Sstevel@tonic-gate 		cpu_ec_state_exl = SB_ECSTATE_EXL;
7850Sstevel@tonic-gate 		cpu_ec_state_mod = SB_ECSTATE_MOD;
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 		/* These states do not exist in sabre - set to 0xFF */
7880Sstevel@tonic-gate 		cpu_ec_state_shr = 0xFF;
7890Sstevel@tonic-gate 		cpu_ec_state_own = 0xFF;
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 		cpu_ec_state_valid = SB_ECSTATE_VALID;
7920Sstevel@tonic-gate 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
7930Sstevel@tonic-gate 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
7940Sstevel@tonic-gate 		cpu_ec_parity = SB_EC_PARITY;
7950Sstevel@tonic-gate 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
7960Sstevel@tonic-gate 		isus2e = 1;
7970Sstevel@tonic-gate 		cpu_ec_tag_mask = HB_ECTAG_MASK;
7980Sstevel@tonic-gate 		cpu_ec_state_mask = HB_ECSTATE_MASK;
7990Sstevel@tonic-gate 		cpu_ec_par_mask = HB_ECPAR_MASK;
8000Sstevel@tonic-gate 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
8010Sstevel@tonic-gate 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
8020Sstevel@tonic-gate 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
8030Sstevel@tonic-gate 		cpu_ec_state_exl = HB_ECSTATE_EXL;
8040Sstevel@tonic-gate 		cpu_ec_state_mod = HB_ECSTATE_MOD;
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 		/* These states do not exist in hummingbird - set to 0xFF */
8070Sstevel@tonic-gate 		cpu_ec_state_shr = 0xFF;
8080Sstevel@tonic-gate 		cpu_ec_state_own = 0xFF;
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 		cpu_ec_state_valid = HB_ECSTATE_VALID;
8110Sstevel@tonic-gate 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
8120Sstevel@tonic-gate 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
8130Sstevel@tonic-gate 		cpu_ec_parity = HB_EC_PARITY;
8140Sstevel@tonic-gate 	}
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 	return (sizeof (spitf_async_flt));
8170Sstevel@tonic-gate }
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate /*
8210Sstevel@tonic-gate  * Correctable ecc error trap handler
8220Sstevel@tonic-gate  */
8230Sstevel@tonic-gate /*ARGSUSED*/
8240Sstevel@tonic-gate void
8250Sstevel@tonic-gate cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
8260Sstevel@tonic-gate 	uint_t p_afsr_high, uint_t p_afar_high)
8270Sstevel@tonic-gate {
8280Sstevel@tonic-gate 	ushort_t sdbh, sdbl;
8290Sstevel@tonic-gate 	ushort_t e_syndh, e_syndl;
8300Sstevel@tonic-gate 	spitf_async_flt spf_flt;
8310Sstevel@tonic-gate 	struct async_flt *ecc;
8320Sstevel@tonic-gate 	int queue = 1;
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 	uint64_t t_afar = p_afar;
8350Sstevel@tonic-gate 	uint64_t t_afsr = p_afsr;
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate 	/*
8380Sstevel@tonic-gate 	 * Note: the Spitfire data buffer error registers
8390Sstevel@tonic-gate 	 * (upper and lower halves) are or'ed into the upper
8400Sstevel@tonic-gate 	 * word of the afsr by ce_err().
8410Sstevel@tonic-gate 	 */
8420Sstevel@tonic-gate 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
8430Sstevel@tonic-gate 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
8460Sstevel@tonic-gate 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
8470Sstevel@tonic-gate 
8480Sstevel@tonic-gate 	t_afsr &= S_AFSR_MASK;
8490Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate 	/* Setup the async fault structure */
8520Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
8530Sstevel@tonic-gate 	ecc = (struct async_flt *)&spf_flt;
8540Sstevel@tonic-gate 	ecc->flt_id = gethrtime_waitfree();
8550Sstevel@tonic-gate 	ecc->flt_stat = t_afsr;
8560Sstevel@tonic-gate 	ecc->flt_addr = t_afar;
8570Sstevel@tonic-gate 	ecc->flt_status = ECC_C_TRAP;
8580Sstevel@tonic-gate 	ecc->flt_bus_id = getprocessorid();
8590Sstevel@tonic-gate 	ecc->flt_inst = CPU->cpu_id;
8600Sstevel@tonic-gate 	ecc->flt_pc = (caddr_t)rp->r_pc;
8610Sstevel@tonic-gate 	ecc->flt_func = log_ce_err;
8620Sstevel@tonic-gate 	ecc->flt_in_memory =
8630Sstevel@tonic-gate 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
8640Sstevel@tonic-gate 	spf_flt.flt_sdbh = sdbh;
8650Sstevel@tonic-gate 	spf_flt.flt_sdbl = sdbl;
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	/*
8680Sstevel@tonic-gate 	 * Check for fatal conditions.
8690Sstevel@tonic-gate 	 */
8700Sstevel@tonic-gate 	check_misc_err(&spf_flt);
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	/*
8730Sstevel@tonic-gate 	 * Pananoid checks for valid AFSR and UDBs
8740Sstevel@tonic-gate 	 */
8750Sstevel@tonic-gate 	if ((t_afsr & P_AFSR_CE) == 0) {
8760Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
8770Sstevel@tonic-gate 			"** Panic due to CE bit not set in the AFSR",
8780Sstevel@tonic-gate 			"  Corrected Memory Error on");
8790Sstevel@tonic-gate 	}
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	/*
8820Sstevel@tonic-gate 	 * We want to skip logging only if ALL the following
8830Sstevel@tonic-gate 	 * conditions are true:
8840Sstevel@tonic-gate 	 *
8850Sstevel@tonic-gate 	 *	1. There is only one error
8860Sstevel@tonic-gate 	 *	2. That error is a correctable memory error
8870Sstevel@tonic-gate 	 *	3. The error is caused by the memory scrubber (in which case
8880Sstevel@tonic-gate 	 *	    the error will have occurred under on_trap protection)
8890Sstevel@tonic-gate 	 *	4. The error is on a retired page
8900Sstevel@tonic-gate 	 *
8910Sstevel@tonic-gate 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
8920Sstevel@tonic-gate 	 * However, none of those errors should occur on a retired page.
8930Sstevel@tonic-gate 	 */
8940Sstevel@tonic-gate 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
8950Sstevel@tonic-gate 	    curthread->t_ontrap != NULL) {
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
8980Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
8990Sstevel@tonic-gate 			    (ecc->flt_addr >> MMU_PAGESHIFT));
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
9020Sstevel@tonic-gate 				queue = 0;
9030Sstevel@tonic-gate 			}
9040Sstevel@tonic-gate 		}
9050Sstevel@tonic-gate 	}
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
9080Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
9090Sstevel@tonic-gate 			"** Panic due to CE bits not set in the UDBs",
9100Sstevel@tonic-gate 			" Corrected Memory Error on");
9110Sstevel@tonic-gate 	}
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	if ((sdbh >> 8) & 1) {
9140Sstevel@tonic-gate 		ecc->flt_synd = e_syndh;
9150Sstevel@tonic-gate 		ce_scrub(ecc);
9160Sstevel@tonic-gate 		if (queue) {
9170Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
9180Sstevel@tonic-gate 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
9190Sstevel@tonic-gate 		}
9200Sstevel@tonic-gate 	}
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	if ((sdbl >> 8) & 1) {
9230Sstevel@tonic-gate 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
9240Sstevel@tonic-gate 		ecc->flt_synd = e_syndl | UDBL_REG;
9250Sstevel@tonic-gate 		ce_scrub(ecc);
9260Sstevel@tonic-gate 		if (queue) {
9270Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
9280Sstevel@tonic-gate 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
9290Sstevel@tonic-gate 		}
9300Sstevel@tonic-gate 	}
9310Sstevel@tonic-gate 
9320Sstevel@tonic-gate 	/*
9330Sstevel@tonic-gate 	 * Re-enable all error trapping (CEEN currently cleared).
9340Sstevel@tonic-gate 	 */
9350Sstevel@tonic-gate 	clr_datapath();
9360Sstevel@tonic-gate 	set_asyncflt(P_AFSR_CE);
9370Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
9380Sstevel@tonic-gate }
9390Sstevel@tonic-gate 
9400Sstevel@tonic-gate /*
9410Sstevel@tonic-gate  * Cpu specific CE logging routine
9420Sstevel@tonic-gate  */
9430Sstevel@tonic-gate static void
9440Sstevel@tonic-gate log_ce_err(struct async_flt *aflt, char *unum)
9450Sstevel@tonic-gate {
9460Sstevel@tonic-gate 	spitf_async_flt spf_flt;
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
9490Sstevel@tonic-gate 		return;
9500Sstevel@tonic-gate 	}
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	spf_flt.cmn_asyncflt = *aflt;
9530Sstevel@tonic-gate 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
9540Sstevel@tonic-gate 	    " Corrected Memory Error detected by");
9550Sstevel@tonic-gate }
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate /*
9580Sstevel@tonic-gate  * Spitfire does not perform any further CE classification refinement
9590Sstevel@tonic-gate  */
9600Sstevel@tonic-gate /*ARGSUSED*/
9610Sstevel@tonic-gate int
9620Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
9630Sstevel@tonic-gate     size_t afltoffset)
9640Sstevel@tonic-gate {
9650Sstevel@tonic-gate 	return (0);
9660Sstevel@tonic-gate }
9670Sstevel@tonic-gate 
9680Sstevel@tonic-gate char *
9690Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt)
9700Sstevel@tonic-gate {
9710Sstevel@tonic-gate 	if (aflt->flt_status & ECC_INTERMITTENT)
9720Sstevel@tonic-gate 		return (ERR_TYPE_DESC_INTERMITTENT);
9730Sstevel@tonic-gate 	if (aflt->flt_status & ECC_PERSISTENT)
9740Sstevel@tonic-gate 		return (ERR_TYPE_DESC_PERSISTENT);
9750Sstevel@tonic-gate 	if (aflt->flt_status & ECC_STICKY)
9760Sstevel@tonic-gate 		return (ERR_TYPE_DESC_STICKY);
9770Sstevel@tonic-gate 	return (ERR_TYPE_DESC_UNKNOWN);
9780Sstevel@tonic-gate }
9790Sstevel@tonic-gate 
9800Sstevel@tonic-gate /*
9810Sstevel@tonic-gate  * Called by correctable ecc error logging code to print out
9820Sstevel@tonic-gate  * the stick/persistent/intermittent status of the error.
9830Sstevel@tonic-gate  */
9840Sstevel@tonic-gate static void
9850Sstevel@tonic-gate cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
9860Sstevel@tonic-gate {
9870Sstevel@tonic-gate 	ushort_t status;
9880Sstevel@tonic-gate 	char *status1_str = "Memory";
9890Sstevel@tonic-gate 	char *status2_str = "Intermittent";
9900Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
9910Sstevel@tonic-gate 
9920Sstevel@tonic-gate 	status = aflt->flt_status;
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	if (status & ECC_ECACHE)
9950Sstevel@tonic-gate 		status1_str = "Ecache";
9960Sstevel@tonic-gate 
9970Sstevel@tonic-gate 	if (status & ECC_STICKY)
9980Sstevel@tonic-gate 		status2_str = "Sticky";
9990Sstevel@tonic-gate 	else if (status & ECC_PERSISTENT)
10000Sstevel@tonic-gate 		status2_str = "Persistent";
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
10030Sstevel@tonic-gate 		NULL, " Corrected %s Error on %s is %s",
10040Sstevel@tonic-gate 		status1_str, unum, status2_str);
10050Sstevel@tonic-gate }
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate /*
10080Sstevel@tonic-gate  * check for a valid ce syndrome, then call the
10090Sstevel@tonic-gate  * displacement flush scrubbing code, and then check the afsr to see if
10100Sstevel@tonic-gate  * the error was persistent or intermittent. Reread the afar/afsr to see
10110Sstevel@tonic-gate  * if the error was not scrubbed successfully, and is therefore sticky.
10120Sstevel@tonic-gate  */
10130Sstevel@tonic-gate /*ARGSUSED1*/
10140Sstevel@tonic-gate void
10150Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
10160Sstevel@tonic-gate {
10170Sstevel@tonic-gate 	uint64_t eer, afsr;
10180Sstevel@tonic-gate 	ushort_t status;
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate 	ASSERT(getpil() > LOCK_LEVEL);
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	/*
10230Sstevel@tonic-gate 	 * It is possible that the flt_addr is not a valid
10240Sstevel@tonic-gate 	 * physical address. To deal with this, we disable
10250Sstevel@tonic-gate 	 * NCEEN while we scrub that address. If this causes
10260Sstevel@tonic-gate 	 * a TIMEOUT/BERR, we know this is an invalid
10270Sstevel@tonic-gate 	 * memory location.
10280Sstevel@tonic-gate 	 */
10290Sstevel@tonic-gate 	kpreempt_disable();
10300Sstevel@tonic-gate 	eer = get_error_enable();
10310Sstevel@tonic-gate 	if (eer & (EER_CEEN | EER_NCEEN))
10320Sstevel@tonic-gate 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	/*
10350Sstevel@tonic-gate 	 * To check if the error detected by IO is persistent, sticky or
10360Sstevel@tonic-gate 	 * intermittent.
10370Sstevel@tonic-gate 	 */
10380Sstevel@tonic-gate 	if (ecc->flt_status & ECC_IOBUS) {
10390Sstevel@tonic-gate 		ecc->flt_stat = P_AFSR_CE;
10400Sstevel@tonic-gate 	}
10410Sstevel@tonic-gate 
10420Sstevel@tonic-gate 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
10430Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_size);
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	get_asyncflt(&afsr);
10460Sstevel@tonic-gate 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
10470Sstevel@tonic-gate 		/*
10480Sstevel@tonic-gate 		 * Must ensure that we don't get the TIMEOUT/BERR
10490Sstevel@tonic-gate 		 * when we reenable NCEEN, so we clear the AFSR.
10500Sstevel@tonic-gate 		 */
10510Sstevel@tonic-gate 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
10520Sstevel@tonic-gate 		if (eer & (EER_CEEN | EER_NCEEN))
10530Sstevel@tonic-gate 		    set_error_enable(eer);
10540Sstevel@tonic-gate 		kpreempt_enable();
10550Sstevel@tonic-gate 		return;
10560Sstevel@tonic-gate 	}
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate 	if (eer & EER_NCEEN)
10590Sstevel@tonic-gate 	    set_error_enable(eer & ~EER_CEEN);
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	/*
10620Sstevel@tonic-gate 	 * Check and clear any ECC errors from the scrub.  If the scrub did
10630Sstevel@tonic-gate 	 * not trip over the error, mark it intermittent.  If the scrub did
10640Sstevel@tonic-gate 	 * trip the error again and it did not scrub away, mark it sticky.
10650Sstevel@tonic-gate 	 * Otherwise mark it persistent.
10660Sstevel@tonic-gate 	 */
10670Sstevel@tonic-gate 	if (check_ecc(ecc) != 0) {
10680Sstevel@tonic-gate 		cpu_read_paddr(ecc, 0, 1);
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 		if (check_ecc(ecc) != 0)
10710Sstevel@tonic-gate 			status = ECC_STICKY;
10720Sstevel@tonic-gate 		else
10730Sstevel@tonic-gate 			status = ECC_PERSISTENT;
10740Sstevel@tonic-gate 	} else
10750Sstevel@tonic-gate 		status = ECC_INTERMITTENT;
10760Sstevel@tonic-gate 
10770Sstevel@tonic-gate 	if (eer & (EER_CEEN | EER_NCEEN))
10780Sstevel@tonic-gate 	    set_error_enable(eer);
10790Sstevel@tonic-gate 	kpreempt_enable();
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
10820Sstevel@tonic-gate 	ecc->flt_status |= status;
10830Sstevel@tonic-gate }
10840Sstevel@tonic-gate 
10850Sstevel@tonic-gate /*
10860Sstevel@tonic-gate  * get the syndrome and unum, and then call the routines
10870Sstevel@tonic-gate  * to check the other cpus and iobuses, and then do the error logging.
10880Sstevel@tonic-gate  */
10890Sstevel@tonic-gate /*ARGSUSED1*/
10900Sstevel@tonic-gate void
10910Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
10920Sstevel@tonic-gate {
10930Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
10940Sstevel@tonic-gate 	int len = 0;
10950Sstevel@tonic-gate 	int ce_verbose = 0;
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 	ASSERT(ecc->flt_func != NULL);
10980Sstevel@tonic-gate 
10990Sstevel@tonic-gate 	/* Get the unum string for logging purposes */
11000Sstevel@tonic-gate 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
11010Sstevel@tonic-gate 	    UNUM_NAMLEN, &len);
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 	/* Call specific error logging routine */
11040Sstevel@tonic-gate 	(void) (*ecc->flt_func)(ecc, unum);
11050Sstevel@tonic-gate 
11060Sstevel@tonic-gate 	/*
11070Sstevel@tonic-gate 	 * Count errors per unum.
11080Sstevel@tonic-gate 	 * Non-memory errors are all counted via a special unum string.
11090Sstevel@tonic-gate 	 */
11100Sstevel@tonic-gate 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
11110Sstevel@tonic-gate 	    automatic_page_removal) {
11120Sstevel@tonic-gate 		page_t *pp = page_numtopp_nolock((pfn_t)
11130Sstevel@tonic-gate 		    (ecc->flt_addr >> MMU_PAGESHIFT));
11140Sstevel@tonic-gate 
11150Sstevel@tonic-gate 		if (pp) {
11160Sstevel@tonic-gate 			page_settoxic(pp, PAGE_IS_FAULTY);
11170Sstevel@tonic-gate 			(void) page_retire(pp, PAGE_IS_FAILING);
11180Sstevel@tonic-gate 		}
11190Sstevel@tonic-gate 	}
11200Sstevel@tonic-gate 
11210Sstevel@tonic-gate 	if (ecc->flt_panic) {
11220Sstevel@tonic-gate 		ce_verbose = 1;
11230Sstevel@tonic-gate 	} else if ((ecc->flt_class == BUS_FAULT) ||
11240Sstevel@tonic-gate 	    (ecc->flt_stat & P_AFSR_CE)) {
11250Sstevel@tonic-gate 		ce_verbose = (ce_verbose_memory > 0);
11260Sstevel@tonic-gate 	} else {
11270Sstevel@tonic-gate 		ce_verbose = 1;
11280Sstevel@tonic-gate 	}
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate 	if (ce_verbose) {
11310Sstevel@tonic-gate 		spitf_async_flt sflt;
11320Sstevel@tonic-gate 		int synd_code;
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
11350Sstevel@tonic-gate 
11360Sstevel@tonic-gate 		cpu_ce_log_status(&sflt, unum);
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
11390Sstevel@tonic-gate 				SYND(ecc->flt_synd));
11400Sstevel@tonic-gate 
11410Sstevel@tonic-gate 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
11420Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
11430Sstevel@tonic-gate 			    NULL, " ECC Data Bit %2d was in error "
11440Sstevel@tonic-gate 			    "and corrected", synd_code);
11450Sstevel@tonic-gate 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
11460Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
11470Sstevel@tonic-gate 			    NULL, " ECC Check Bit %2d was in error "
11480Sstevel@tonic-gate 			    "and corrected", synd_code - C0);
11490Sstevel@tonic-gate 		} else {
11500Sstevel@tonic-gate 			/*
11510Sstevel@tonic-gate 			 * These are UE errors - we shouldn't be getting CE
11520Sstevel@tonic-gate 			 * traps for these; handle them in case of bad h/w.
11530Sstevel@tonic-gate 			 */
11540Sstevel@tonic-gate 			switch (synd_code) {
11550Sstevel@tonic-gate 			case M2:
11560Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
11570Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
11580Sstevel@tonic-gate 				    " Two ECC Bits were in error");
11590Sstevel@tonic-gate 				break;
11600Sstevel@tonic-gate 			case M3:
11610Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
11620Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
11630Sstevel@tonic-gate 				    " Three ECC Bits were in error");
11640Sstevel@tonic-gate 				break;
11650Sstevel@tonic-gate 			case M4:
11660Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
11670Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
11680Sstevel@tonic-gate 				    " Four ECC Bits were in error");
11690Sstevel@tonic-gate 				break;
11700Sstevel@tonic-gate 			case MX:
11710Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
11720Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
11730Sstevel@tonic-gate 				    " More than Four ECC bits were "
11740Sstevel@tonic-gate 				    "in error");
11750Sstevel@tonic-gate 				break;
11760Sstevel@tonic-gate 			default:
11770Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, &sflt,
11780Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
11790Sstevel@tonic-gate 				    " Unknown fault syndrome %d",
11800Sstevel@tonic-gate 				    synd_code);
11810Sstevel@tonic-gate 				break;
11820Sstevel@tonic-gate 			}
11830Sstevel@tonic-gate 		}
11840Sstevel@tonic-gate 	}
11850Sstevel@tonic-gate 
11860Sstevel@tonic-gate 	/* Display entire cache line, if valid address */
11870Sstevel@tonic-gate 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
11880Sstevel@tonic-gate 		read_ecc_data(ecc, 1, 1);
11890Sstevel@tonic-gate }
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate /*
11920Sstevel@tonic-gate  * We route all errors through a single switch statement.
11930Sstevel@tonic-gate  */
11940Sstevel@tonic-gate void
11950Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
11960Sstevel@tonic-gate {
11970Sstevel@tonic-gate 
11980Sstevel@tonic-gate 	switch (aflt->flt_class) {
11990Sstevel@tonic-gate 	case CPU_FAULT:
12000Sstevel@tonic-gate 		cpu_async_log_err(aflt);
12010Sstevel@tonic-gate 		break;
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate 	case BUS_FAULT:
12040Sstevel@tonic-gate 		bus_async_log_err(aflt);
12050Sstevel@tonic-gate 		break;
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate 	default:
12080Sstevel@tonic-gate 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
12090Sstevel@tonic-gate 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
12100Sstevel@tonic-gate 		break;
12110Sstevel@tonic-gate 	}
12120Sstevel@tonic-gate }
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate /* Values for action variable in cpu_async_error() */
12150Sstevel@tonic-gate #define	ACTION_NONE		0
12160Sstevel@tonic-gate #define	ACTION_TRAMPOLINE	1
12170Sstevel@tonic-gate #define	ACTION_AST_FLAGS	2
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate  * Access error trap handler for asynchronous cpu errors.  This routine is
12210Sstevel@tonic-gate  * called to handle a data or instruction access error.  All fatal errors are
12220Sstevel@tonic-gate  * completely handled by this routine (by panicking).  Non fatal error logging
12230Sstevel@tonic-gate  * is queued for later processing either via AST or softint at a lower PIL.
12240Sstevel@tonic-gate  * In case of panic, the error log queue will also be processed as part of the
12250Sstevel@tonic-gate  * panic flow to ensure all errors are logged.  This routine is called with all
12260Sstevel@tonic-gate  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
12270Sstevel@tonic-gate  * error bits are also cleared.  The hardware has also disabled the I and
12280Sstevel@tonic-gate  * D-caches for us, so we must re-enable them before returning.
12290Sstevel@tonic-gate  *
12300Sstevel@tonic-gate  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
12310Sstevel@tonic-gate  *
12320Sstevel@tonic-gate  *		_______________________________________________________________
12330Sstevel@tonic-gate  *		|        Privileged tl0		|         Unprivileged	      |
12340Sstevel@tonic-gate  *		| Protected	| Unprotected	| Protected	| Unprotected |
12350Sstevel@tonic-gate  *		|on_trap|lofault|		|		|	      |
12360Sstevel@tonic-gate  * -------------|-------|-------+---------------+---------------+-------------|
12370Sstevel@tonic-gate  *		|	|	|		|		|	      |
12380Sstevel@tonic-gate  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
12390Sstevel@tonic-gate  *		|	|	|		|		|	      |
12400Sstevel@tonic-gate  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
12410Sstevel@tonic-gate  *		|	|	|		|		|	      |
12420Sstevel@tonic-gate  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
12430Sstevel@tonic-gate  *		|	|	|		|		|	      |
12440Sstevel@tonic-gate  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
12450Sstevel@tonic-gate  * ____________________________________________________________________________
12460Sstevel@tonic-gate  *
12470Sstevel@tonic-gate  *
12480Sstevel@tonic-gate  * Action codes:
12490Sstevel@tonic-gate  *
12500Sstevel@tonic-gate  * L - log
12510Sstevel@tonic-gate  * M - kick off memscrubber if flt_in_memory
12520Sstevel@tonic-gate  * P - panic
12530Sstevel@tonic-gate  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
12540Sstevel@tonic-gate  * R - i)  if aft_panic is set, panic
12550Sstevel@tonic-gate  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
12560Sstevel@tonic-gate  * S - send SIGBUS to process
12570Sstevel@tonic-gate  * T - trampoline
12580Sstevel@tonic-gate  *
12590Sstevel@tonic-gate  * Special cases:
12600Sstevel@tonic-gate  *
12610Sstevel@tonic-gate  * 1) if aft_testfatal is set, all faults result in a panic regardless
12620Sstevel@tonic-gate  *    of type (even WP), protection (even on_trap), or privilege.
12630Sstevel@tonic-gate  */
12640Sstevel@tonic-gate /*ARGSUSED*/
12650Sstevel@tonic-gate void
12660Sstevel@tonic-gate cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
12670Sstevel@tonic-gate 	uint_t p_afsr_high, uint_t p_afar_high)
12680Sstevel@tonic-gate {
12690Sstevel@tonic-gate 	ushort_t sdbh, sdbl, ttype, tl;
12700Sstevel@tonic-gate 	spitf_async_flt spf_flt;
12710Sstevel@tonic-gate 	struct async_flt *aflt;
12720Sstevel@tonic-gate 	char pr_reason[28];
12730Sstevel@tonic-gate 	uint64_t oafsr;
12740Sstevel@tonic-gate 	uint64_t acc_afsr = 0;			/* accumulated afsr */
12750Sstevel@tonic-gate 	int action = ACTION_NONE;
12760Sstevel@tonic-gate 	uint64_t t_afar = p_afar;
12770Sstevel@tonic-gate 	uint64_t t_afsr = p_afsr;
12780Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
12790Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	/*
12820Sstevel@tonic-gate 	 * We need to look at p_flag to determine if the thread detected an
12830Sstevel@tonic-gate 	 * error while dumping core.  We can't grab p_lock here, but it's ok
12840Sstevel@tonic-gate 	 * because we just need a consistent snapshot and we know that everyone
12850Sstevel@tonic-gate 	 * else will store a consistent set of bits while holding p_lock.  We
12860Sstevel@tonic-gate 	 * don't have to worry about a race because SDOCORE is set once prior
12870Sstevel@tonic-gate 	 * to doing i/o from the process's address space and is never cleared.
12880Sstevel@tonic-gate 	 */
12890Sstevel@tonic-gate 	uint_t pflag = ttoproc(curthread)->p_flag;
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	pr_reason[0] = '\0';
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate 	/*
12940Sstevel@tonic-gate 	 * Note: the Spitfire data buffer error registers
12950Sstevel@tonic-gate 	 * (upper and lower halves) are or'ed into the upper
12960Sstevel@tonic-gate 	 * word of the afsr by async_err() if P_AFSR_UE is set.
12970Sstevel@tonic-gate 	 */
12980Sstevel@tonic-gate 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
12990Sstevel@tonic-gate 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 	/*
13020Sstevel@tonic-gate 	 * Grab the ttype encoded in <63:53> of the saved
13030Sstevel@tonic-gate 	 * afsr passed from async_err()
13040Sstevel@tonic-gate 	 */
13050Sstevel@tonic-gate 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
13060Sstevel@tonic-gate 	tl = (ushort_t)(t_afsr >> 62);
13070Sstevel@tonic-gate 
13080Sstevel@tonic-gate 	t_afsr &= S_AFSR_MASK;
13090Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate 	/*
13120Sstevel@tonic-gate 	 * Initialize most of the common and CPU-specific structure.  We derive
13130Sstevel@tonic-gate 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
13140Sstevel@tonic-gate 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
13150Sstevel@tonic-gate 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
13160Sstevel@tonic-gate 	 * tuneable aft_testfatal is set (not the default).
13170Sstevel@tonic-gate 	 */
13180Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
13190Sstevel@tonic-gate 	aflt = (struct async_flt *)&spf_flt;
13200Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
13210Sstevel@tonic-gate 	aflt->flt_stat = t_afsr;
13220Sstevel@tonic-gate 	aflt->flt_addr = t_afar;
13230Sstevel@tonic-gate 	aflt->flt_bus_id = getprocessorid();
13240Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
13250Sstevel@tonic-gate 	aflt->flt_pc = (caddr_t)rp->r_pc;
13260Sstevel@tonic-gate 	aflt->flt_prot = AFLT_PROT_NONE;
13270Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
13280Sstevel@tonic-gate 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
13290Sstevel@tonic-gate 	aflt->flt_tl = (uchar_t)tl;
13300Sstevel@tonic-gate 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
13310Sstevel@tonic-gate 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate 	/*
13340Sstevel@tonic-gate 	 * Set flt_status based on the trap type.  If we end up here as the
13350Sstevel@tonic-gate 	 * result of a UE detected by the CE handling code, leave status 0.
13360Sstevel@tonic-gate 	 */
13370Sstevel@tonic-gate 	switch (ttype) {
13380Sstevel@tonic-gate 	case T_DATA_ERROR:
13390Sstevel@tonic-gate 		aflt->flt_status = ECC_D_TRAP;
13400Sstevel@tonic-gate 		break;
13410Sstevel@tonic-gate 	case T_INSTR_ERROR:
13420Sstevel@tonic-gate 		aflt->flt_status = ECC_I_TRAP;
13430Sstevel@tonic-gate 		break;
13440Sstevel@tonic-gate 	}
13450Sstevel@tonic-gate 
13460Sstevel@tonic-gate 	spf_flt.flt_sdbh = sdbh;
13470Sstevel@tonic-gate 	spf_flt.flt_sdbl = sdbl;
13480Sstevel@tonic-gate 
13490Sstevel@tonic-gate 	/*
13500Sstevel@tonic-gate 	 * Check for fatal async errors.
13510Sstevel@tonic-gate 	 */
13520Sstevel@tonic-gate 	check_misc_err(&spf_flt);
13530Sstevel@tonic-gate 
13540Sstevel@tonic-gate 	/*
13550Sstevel@tonic-gate 	 * If the trap occurred in privileged mode at TL=0, we need to check to
13560Sstevel@tonic-gate 	 * see if we were executing in the kernel under on_trap() or t_lofault
13570Sstevel@tonic-gate 	 * protection.  If so, modify the saved registers so that we return
13580Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine.
13590Sstevel@tonic-gate 	 */
13600Sstevel@tonic-gate 	if (aflt->flt_priv && tl == 0) {
13610Sstevel@tonic-gate 		if (curthread->t_ontrap != NULL) {
13620Sstevel@tonic-gate 			on_trap_data_t *otp = curthread->t_ontrap;
13630Sstevel@tonic-gate 
13640Sstevel@tonic-gate 			if (otp->ot_prot & OT_DATA_EC) {
13650Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_EC;
13660Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_EC;
13670Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
13680Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
13690Sstevel@tonic-gate 				action = ACTION_TRAMPOLINE;
13700Sstevel@tonic-gate 			}
13710Sstevel@tonic-gate 
13720Sstevel@tonic-gate 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
13730Sstevel@tonic-gate 			    (otp->ot_prot & OT_DATA_ACCESS)) {
13740Sstevel@tonic-gate 				aflt->flt_prot = AFLT_PROT_ACCESS;
13750Sstevel@tonic-gate 				otp->ot_trap |= OT_DATA_ACCESS;
13760Sstevel@tonic-gate 				rp->r_pc = otp->ot_trampoline;
13770Sstevel@tonic-gate 				rp->r_npc = rp->r_pc + 4;
13780Sstevel@tonic-gate 				action = ACTION_TRAMPOLINE;
13790Sstevel@tonic-gate 				/*
13800Sstevel@tonic-gate 				 * for peeks and caut_gets errors are expected
13810Sstevel@tonic-gate 				 */
13820Sstevel@tonic-gate 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
13830Sstevel@tonic-gate 				if (!hp)
13840Sstevel@tonic-gate 					expected = DDI_FM_ERR_PEEK;
13850Sstevel@tonic-gate 				else if (hp->ah_acc.devacc_attr_access ==
13860Sstevel@tonic-gate 				    DDI_CAUTIOUS_ACC)
13870Sstevel@tonic-gate 					expected = DDI_FM_ERR_EXPECTED;
13880Sstevel@tonic-gate 			}
13890Sstevel@tonic-gate 
13900Sstevel@tonic-gate 		} else if (curthread->t_lofault) {
13910Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_COPY;
13920Sstevel@tonic-gate 			rp->r_g1 = EFAULT;
13930Sstevel@tonic-gate 			rp->r_pc = curthread->t_lofault;
13940Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
13950Sstevel@tonic-gate 			action = ACTION_TRAMPOLINE;
13960Sstevel@tonic-gate 		}
13970Sstevel@tonic-gate 	}
13980Sstevel@tonic-gate 
13990Sstevel@tonic-gate 	/*
14000Sstevel@tonic-gate 	 * Determine if this error needs to be treated as fatal.  Note that
14010Sstevel@tonic-gate 	 * multiple errors detected upon entry to this trap handler does not
14020Sstevel@tonic-gate 	 * necessarily warrant a panic.  We only want to panic if the trap
14030Sstevel@tonic-gate 	 * happened in privileged mode and not under t_ontrap or t_lofault
14040Sstevel@tonic-gate 	 * protection.  The exception is WP: if we *only* get WP, it is not
14050Sstevel@tonic-gate 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
14060Sstevel@tonic-gate 	 *
14070Sstevel@tonic-gate 	 * aft_panic, if set, effectively makes us treat usermode
14080Sstevel@tonic-gate 	 * UE/EDP/LDP faults as if they were privileged - so we we will
14090Sstevel@tonic-gate 	 * panic instead of sending a contract event.  A lofault-protected
14100Sstevel@tonic-gate 	 * fault will normally follow the contract event; if aft_panic is
14110Sstevel@tonic-gate 	 * set this will be changed to a panic.
14120Sstevel@tonic-gate 	 *
14130Sstevel@tonic-gate 	 * For usermode BERR/BTO errors, eg from processes performing device
14140Sstevel@tonic-gate 	 * control through mapped device memory, we need only deliver
14150Sstevel@tonic-gate 	 * a SIGBUS to the offending process.
14160Sstevel@tonic-gate 	 *
14170Sstevel@tonic-gate 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
14180Sstevel@tonic-gate 	 * checked later; for now we implement the common reasons.
14190Sstevel@tonic-gate 	 */
14200Sstevel@tonic-gate 	if (aflt->flt_prot == AFLT_PROT_NONE) {
14210Sstevel@tonic-gate 		/*
14220Sstevel@tonic-gate 		 * Beware - multiple bits may be set in AFSR
14230Sstevel@tonic-gate 		 */
14240Sstevel@tonic-gate 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
14250Sstevel@tonic-gate 			if (aflt->flt_priv || aft_panic)
14260Sstevel@tonic-gate 				aflt->flt_panic = 1;
14270Sstevel@tonic-gate 		}
14280Sstevel@tonic-gate 
14290Sstevel@tonic-gate 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
14300Sstevel@tonic-gate 			if (aflt->flt_priv)
14310Sstevel@tonic-gate 				aflt->flt_panic = 1;
14320Sstevel@tonic-gate 		}
14330Sstevel@tonic-gate 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
14340Sstevel@tonic-gate 		aflt->flt_panic = 1;
14350Sstevel@tonic-gate 	}
14360Sstevel@tonic-gate 
14370Sstevel@tonic-gate 	/*
14380Sstevel@tonic-gate 	 * UE/BERR/TO: Call our bus nexus friends to check for
14390Sstevel@tonic-gate 	 * IO errors that may have resulted in this trap.
14400Sstevel@tonic-gate 	 */
14410Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
14420Sstevel@tonic-gate 		cpu_run_bus_error_handlers(aflt, expected);
14430Sstevel@tonic-gate 	}
14440Sstevel@tonic-gate 
14450Sstevel@tonic-gate 	/*
14460Sstevel@tonic-gate 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
14470Sstevel@tonic-gate 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
14480Sstevel@tonic-gate 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
14490Sstevel@tonic-gate 	 * caches may introduce more parity errors (especially when the module
14500Sstevel@tonic-gate 	 * is bad) and in sabre there is no guarantee that such errors
14510Sstevel@tonic-gate 	 * (if introduced) are written back as poisoned data.
14520Sstevel@tonic-gate 	 */
14530Sstevel@tonic-gate 	if (t_afsr & P_AFSR_UE) {
14540Sstevel@tonic-gate 		int i;
14550Sstevel@tonic-gate 
14560Sstevel@tonic-gate 		(void) strcat(pr_reason, "UE ");
14570Sstevel@tonic-gate 
14580Sstevel@tonic-gate 		spf_flt.flt_type = CPU_UE_ERR;
14590Sstevel@tonic-gate 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
14600Sstevel@tonic-gate 			MMU_PAGESHIFT)) ? 1: 0;
14610Sstevel@tonic-gate 
14620Sstevel@tonic-gate 		/*
14630Sstevel@tonic-gate 		 * With UE, we have the PA of the fault.
14640Sstevel@tonic-gate 		 * Let do a diagnostic read to get the ecache
14650Sstevel@tonic-gate 		 * data and tag info of the bad line for logging.
14660Sstevel@tonic-gate 		 */
14670Sstevel@tonic-gate 		if (aflt->flt_in_memory) {
14680Sstevel@tonic-gate 			uint32_t ec_set_size;
14690Sstevel@tonic-gate 			uchar_t state;
14700Sstevel@tonic-gate 			uint32_t ecache_idx;
14710Sstevel@tonic-gate 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
14720Sstevel@tonic-gate 
14730Sstevel@tonic-gate 			/* touch the line to put it in ecache */
14740Sstevel@tonic-gate 			acc_afsr |= read_and_clear_afsr();
14750Sstevel@tonic-gate 			(void) lddphys(faultpa);
14760Sstevel@tonic-gate 			acc_afsr |= (read_and_clear_afsr() &
14770Sstevel@tonic-gate 				    ~(P_AFSR_EDP | P_AFSR_UE));
14780Sstevel@tonic-gate 
14790Sstevel@tonic-gate 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
14800Sstevel@tonic-gate 			    ecache_associativity;
14810Sstevel@tonic-gate 
14820Sstevel@tonic-gate 			for (i = 0; i < ecache_associativity; i++) {
14830Sstevel@tonic-gate 				ecache_idx = i * ec_set_size +
14840Sstevel@tonic-gate 				    (aflt->flt_addr % ec_set_size);
14850Sstevel@tonic-gate 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
14860Sstevel@tonic-gate 					(uint64_t *)&spf_flt.flt_ec_data[0],
14870Sstevel@tonic-gate 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
14880Sstevel@tonic-gate 				acc_afsr |= oafsr;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 				state = (uchar_t)((spf_flt.flt_ec_tag &
14910Sstevel@tonic-gate 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
14920Sstevel@tonic-gate 
14930Sstevel@tonic-gate 				if ((state & cpu_ec_state_valid) &&
14940Sstevel@tonic-gate 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
14950Sstevel@tonic-gate 				    ((uint64_t)aflt->flt_addr >>
14960Sstevel@tonic-gate 				    cpu_ec_tag_shift)))
14970Sstevel@tonic-gate 					break;
14980Sstevel@tonic-gate 			}
14990Sstevel@tonic-gate 
15000Sstevel@tonic-gate 			/*
15010Sstevel@tonic-gate 			 * Check to see if the ecache tag is valid for the
15020Sstevel@tonic-gate 			 * fault PA. In the very unlikely event where the
15030Sstevel@tonic-gate 			 * line could be victimized, no ecache info will be
15040Sstevel@tonic-gate 			 * available. If this is the case, capture the line
15050Sstevel@tonic-gate 			 * from memory instead.
15060Sstevel@tonic-gate 			 */
15070Sstevel@tonic-gate 			if ((state & cpu_ec_state_valid) == 0 ||
15080Sstevel@tonic-gate 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
15090Sstevel@tonic-gate 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
15100Sstevel@tonic-gate 				for (i = 0; i < 8; i++, faultpa += 8) {
15110Sstevel@tonic-gate 				    ec_data_t *ecdptr;
15120Sstevel@tonic-gate 
15130Sstevel@tonic-gate 					ecdptr = &spf_flt.flt_ec_data[i];
15140Sstevel@tonic-gate 					acc_afsr |= read_and_clear_afsr();
15150Sstevel@tonic-gate 					ecdptr->ec_d8 = lddphys(faultpa);
15160Sstevel@tonic-gate 					acc_afsr |= (read_and_clear_afsr() &
15170Sstevel@tonic-gate 						    ~(P_AFSR_EDP | P_AFSR_UE));
15180Sstevel@tonic-gate 					ecdptr->ec_afsr = 0;
15190Sstevel@tonic-gate 							/* null afsr value */
15200Sstevel@tonic-gate 				}
15210Sstevel@tonic-gate 
15220Sstevel@tonic-gate 				/*
15230Sstevel@tonic-gate 				 * Mark tag invalid to indicate mem dump
15240Sstevel@tonic-gate 				 * when we print out the info.
15250Sstevel@tonic-gate 				 */
15260Sstevel@tonic-gate 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
15270Sstevel@tonic-gate 			}
15280Sstevel@tonic-gate 			spf_flt.flt_ec_lcnt = 1;
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 			/*
15310Sstevel@tonic-gate 			 * Flush out the bad line
15320Sstevel@tonic-gate 			 */
15330Sstevel@tonic-gate 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
15340Sstevel@tonic-gate 				cpunodes[CPU->cpu_id].ecache_size);
15350Sstevel@tonic-gate 
15360Sstevel@tonic-gate 			acc_afsr |= clear_errors(NULL, NULL);
15370Sstevel@tonic-gate 		}
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate 		/*
15400Sstevel@tonic-gate 		 * Ask our bus nexus friends if they have any fatal errors. If
15410Sstevel@tonic-gate 		 * so, they will log appropriate error messages and panic as a
15420Sstevel@tonic-gate 		 * result. We then queue an event for each UDB that reports a
15430Sstevel@tonic-gate 		 * UE. Each UE reported in a UDB will have its own log message.
15440Sstevel@tonic-gate 		 *
15450Sstevel@tonic-gate 		 * Note from kbn: In the case where there are multiple UEs
15460Sstevel@tonic-gate 		 * (ME bit is set) - the AFAR address is only accurate to
15470Sstevel@tonic-gate 		 * the 16-byte granularity. One cannot tell whether the AFAR
15480Sstevel@tonic-gate 		 * belongs to the UDBH or UDBL syndromes. In this case, we
15490Sstevel@tonic-gate 		 * always report the AFAR address to be 16-byte aligned.
15500Sstevel@tonic-gate 		 *
15510Sstevel@tonic-gate 		 * If we're on a Sabre, there is no SDBL, but it will always
15520Sstevel@tonic-gate 		 * read as zero, so the sdbl test below will safely fail.
15530Sstevel@tonic-gate 		 */
15540Sstevel@tonic-gate 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
15550Sstevel@tonic-gate 			aflt->flt_panic = 1;
15560Sstevel@tonic-gate 
15570Sstevel@tonic-gate 		if (sdbh & P_DER_UE) {
15580Sstevel@tonic-gate 			aflt->flt_synd = sdbh & P_DER_E_SYND;
15590Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
15600Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
15610Sstevel@tonic-gate 			    aflt->flt_panic);
15620Sstevel@tonic-gate 		}
15630Sstevel@tonic-gate 		if (sdbl & P_DER_UE) {
15640Sstevel@tonic-gate 			aflt->flt_synd = sdbl & P_DER_E_SYND;
15650Sstevel@tonic-gate 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
15660Sstevel@tonic-gate 			if (!(aflt->flt_stat & P_AFSR_ME))
15670Sstevel@tonic-gate 				aflt->flt_addr |= 0x8;
15680Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
15690Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
15700Sstevel@tonic-gate 			    aflt->flt_panic);
15710Sstevel@tonic-gate 		}
15720Sstevel@tonic-gate 
15730Sstevel@tonic-gate 		/*
15740Sstevel@tonic-gate 		 * We got a UE and are panicking, save the fault PA in a known
15750Sstevel@tonic-gate 		 * location so that the platform specific panic code can check
15760Sstevel@tonic-gate 		 * for copyback errors.
15770Sstevel@tonic-gate 		 */
15780Sstevel@tonic-gate 		if (aflt->flt_panic && aflt->flt_in_memory) {
15790Sstevel@tonic-gate 			panic_aflt = *aflt;
15800Sstevel@tonic-gate 		}
15810Sstevel@tonic-gate 	}
15820Sstevel@tonic-gate 
15830Sstevel@tonic-gate 	/*
15840Sstevel@tonic-gate 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
15850Sstevel@tonic-gate 	 * async error for logging. For Sabre, we panic on EDP or LDP.
15860Sstevel@tonic-gate 	 */
15870Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
15880Sstevel@tonic-gate 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 		if (t_afsr & P_AFSR_EDP)
15910Sstevel@tonic-gate 			(void) strcat(pr_reason, "EDP ");
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 		if (t_afsr & P_AFSR_LDP)
15940Sstevel@tonic-gate 			(void) strcat(pr_reason, "LDP ");
15950Sstevel@tonic-gate 
15960Sstevel@tonic-gate 		/*
15970Sstevel@tonic-gate 		 * Here we have no PA to work with.
15980Sstevel@tonic-gate 		 * Scan each line in the ecache to look for
15990Sstevel@tonic-gate 		 * the one with bad parity.
16000Sstevel@tonic-gate 		 */
16010Sstevel@tonic-gate 		aflt->flt_addr = AFLT_INV_ADDR;
16020Sstevel@tonic-gate 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
16030Sstevel@tonic-gate 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
16040Sstevel@tonic-gate 		acc_afsr |= (oafsr & ~P_AFSR_WP);
16050Sstevel@tonic-gate 
16060Sstevel@tonic-gate 		/*
16070Sstevel@tonic-gate 		 * If we found a bad PA, update the state to indicate if it is
16080Sstevel@tonic-gate 		 * memory or I/O space.  This code will be important if we ever
16090Sstevel@tonic-gate 		 * support cacheable frame buffers.
16100Sstevel@tonic-gate 		 */
16110Sstevel@tonic-gate 		if (aflt->flt_addr != AFLT_INV_ADDR) {
16120Sstevel@tonic-gate 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
16130Sstevel@tonic-gate 				MMU_PAGESHIFT)) ? 1 : 0;
16140Sstevel@tonic-gate 		}
16150Sstevel@tonic-gate 
16160Sstevel@tonic-gate 		if (isus2i || isus2e)
16170Sstevel@tonic-gate 			aflt->flt_panic = 1;
16180Sstevel@tonic-gate 
16190Sstevel@tonic-gate 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
16200Sstevel@tonic-gate 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
16210Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
16220Sstevel@tonic-gate 		    aflt->flt_panic);
16230Sstevel@tonic-gate 	}
16240Sstevel@tonic-gate 
16250Sstevel@tonic-gate 	/*
16260Sstevel@tonic-gate 	 * Timeout and bus error handling.  There are two cases to consider:
16270Sstevel@tonic-gate 	 *
16280Sstevel@tonic-gate 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
16290Sstevel@tonic-gate 	 * have already modified the saved registers so that we will return
16300Sstevel@tonic-gate 	 * from the trap to the appropriate trampoline routine; otherwise panic.
16310Sstevel@tonic-gate 	 *
16320Sstevel@tonic-gate 	 * (2) In user mode, we can simply use our AST mechanism to deliver
16330Sstevel@tonic-gate 	 * a SIGBUS.  We do not log the occurence - processes performing
16340Sstevel@tonic-gate 	 * device control would generate lots of uninteresting messages.
16350Sstevel@tonic-gate 	 */
16360Sstevel@tonic-gate 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
16370Sstevel@tonic-gate 		if (t_afsr & P_AFSR_TO)
16380Sstevel@tonic-gate 			(void) strcat(pr_reason, "BTO ");
16390Sstevel@tonic-gate 
16400Sstevel@tonic-gate 		if (t_afsr & P_AFSR_BERR)
16410Sstevel@tonic-gate 			(void) strcat(pr_reason, "BERR ");
16420Sstevel@tonic-gate 
16430Sstevel@tonic-gate 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
16440Sstevel@tonic-gate 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
16450Sstevel@tonic-gate 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
16460Sstevel@tonic-gate 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
16470Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
16480Sstevel@tonic-gate 			    aflt->flt_panic);
16490Sstevel@tonic-gate 		}
16500Sstevel@tonic-gate 	}
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 	/*
16530Sstevel@tonic-gate 	 * Handle WP: WP happens when the ecache is victimized and a parity
16540Sstevel@tonic-gate 	 * error was detected on a writeback.  The data in question will be
16550Sstevel@tonic-gate 	 * poisoned as a UE will be written back.  The PA is not logged and
16560Sstevel@tonic-gate 	 * it is possible that it doesn't belong to the trapped thread.  The
16570Sstevel@tonic-gate 	 * WP trap is not fatal, but it could be fatal to someone that
16580Sstevel@tonic-gate 	 * subsequently accesses the toxic page.  We set read_all_memscrub
16590Sstevel@tonic-gate 	 * to force the memscrubber to read all of memory when it awakens.
16600Sstevel@tonic-gate 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
16610Sstevel@tonic-gate 	 * UE back to poison the data.
16620Sstevel@tonic-gate 	 */
16630Sstevel@tonic-gate 	if (t_afsr & P_AFSR_WP) {
16640Sstevel@tonic-gate 		(void) strcat(pr_reason, "WP ");
16650Sstevel@tonic-gate 		if (isus2i || isus2e) {
16660Sstevel@tonic-gate 			aflt->flt_panic = 1;
16670Sstevel@tonic-gate 		} else {
16680Sstevel@tonic-gate 			read_all_memscrub = 1;
16690Sstevel@tonic-gate 		}
16700Sstevel@tonic-gate 		spf_flt.flt_type = CPU_WP_ERR;
16710Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
16720Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
16730Sstevel@tonic-gate 		    aflt->flt_panic);
16740Sstevel@tonic-gate 	}
16750Sstevel@tonic-gate 
16760Sstevel@tonic-gate 	/*
16770Sstevel@tonic-gate 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
16780Sstevel@tonic-gate 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
16790Sstevel@tonic-gate 	 * This is fatal.
16800Sstevel@tonic-gate 	 */
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	if (t_afsr & P_AFSR_CP) {
16830Sstevel@tonic-gate 		if (isus2i || isus2e) {
16840Sstevel@tonic-gate 			(void) strcat(pr_reason, "CP ");
16850Sstevel@tonic-gate 			aflt->flt_panic = 1;
16860Sstevel@tonic-gate 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
16870Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
16880Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
16890Sstevel@tonic-gate 			    aflt->flt_panic);
16900Sstevel@tonic-gate 		} else {
16910Sstevel@tonic-gate 			/*
16920Sstevel@tonic-gate 			 * Orphan CP: Happens due to signal integrity problem
16930Sstevel@tonic-gate 			 * on a CPU, where a CP is reported, without reporting
16940Sstevel@tonic-gate 			 * its associated UE. This is handled by locating the
16950Sstevel@tonic-gate 			 * bad parity line and would kick off the memscrubber
16960Sstevel@tonic-gate 			 * to find the UE if in memory or in another's cache.
16970Sstevel@tonic-gate 			 */
16980Sstevel@tonic-gate 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
16990Sstevel@tonic-gate 			(void) strcat(pr_reason, "ORPHAN_CP ");
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate 			/*
17020Sstevel@tonic-gate 			 * Here we have no PA to work with.
17030Sstevel@tonic-gate 			 * Scan each line in the ecache to look for
17040Sstevel@tonic-gate 			 * the one with bad parity.
17050Sstevel@tonic-gate 			 */
17060Sstevel@tonic-gate 			aflt->flt_addr = AFLT_INV_ADDR;
17070Sstevel@tonic-gate 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
17080Sstevel@tonic-gate 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
17090Sstevel@tonic-gate 				&oafsr);
17100Sstevel@tonic-gate 			acc_afsr |= oafsr;
17110Sstevel@tonic-gate 
17120Sstevel@tonic-gate 			/*
17130Sstevel@tonic-gate 			 * If we found a bad PA, update the state to indicate
17140Sstevel@tonic-gate 			 * if it is memory or I/O space.
17150Sstevel@tonic-gate 			 */
17160Sstevel@tonic-gate 			if (aflt->flt_addr != AFLT_INV_ADDR) {
17170Sstevel@tonic-gate 				aflt->flt_in_memory =
17180Sstevel@tonic-gate 					(pf_is_memory(aflt->flt_addr >>
17190Sstevel@tonic-gate 						MMU_PAGESHIFT)) ? 1 : 0;
17200Sstevel@tonic-gate 			}
17210Sstevel@tonic-gate 			read_all_memscrub = 1;
17220Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
17230Sstevel@tonic-gate 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
17240Sstevel@tonic-gate 			    aflt->flt_panic);
17250Sstevel@tonic-gate 
17260Sstevel@tonic-gate 		}
17270Sstevel@tonic-gate 	}
17280Sstevel@tonic-gate 
17290Sstevel@tonic-gate 	/*
17300Sstevel@tonic-gate 	 * If we queued an error other than WP or CP and we are going to return
17310Sstevel@tonic-gate 	 * from the trap and the error was in user mode or inside of a
17320Sstevel@tonic-gate 	 * copy routine, set AST flag so the queue will be drained before
17330Sstevel@tonic-gate 	 * returning to user mode.
17340Sstevel@tonic-gate 	 *
17350Sstevel@tonic-gate 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
17360Sstevel@tonic-gate 	 * and send an event to its process contract.
17370Sstevel@tonic-gate 	 *
17380Sstevel@tonic-gate 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
17390Sstevel@tonic-gate 	 * will have been no error queued in this case.
17400Sstevel@tonic-gate 	 */
17410Sstevel@tonic-gate 	if ((t_afsr &
17420Sstevel@tonic-gate 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
17430Sstevel@tonic-gate 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
17440Sstevel@tonic-gate 			int pcb_flag = 0;
17450Sstevel@tonic-gate 
17460Sstevel@tonic-gate 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
17470Sstevel@tonic-gate 				pcb_flag |= ASYNC_HWERR;
17480Sstevel@tonic-gate 
17490Sstevel@tonic-gate 			if (t_afsr & P_AFSR_BERR)
17500Sstevel@tonic-gate 				pcb_flag |= ASYNC_BERR;
17510Sstevel@tonic-gate 
17520Sstevel@tonic-gate 			if (t_afsr & P_AFSR_TO)
17530Sstevel@tonic-gate 				pcb_flag |= ASYNC_BTO;
17540Sstevel@tonic-gate 
17550Sstevel@tonic-gate 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
17560Sstevel@tonic-gate 			aston(curthread);
17570Sstevel@tonic-gate 			action = ACTION_AST_FLAGS;
17580Sstevel@tonic-gate 	}
17590Sstevel@tonic-gate 
17600Sstevel@tonic-gate 	/*
17610Sstevel@tonic-gate 	 * In response to a deferred error, we must do one of three things:
17620Sstevel@tonic-gate 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
17630Sstevel@tonic-gate 	 * set in cases (1) and (2) - check that either action is set or
17640Sstevel@tonic-gate 	 * (3) is true.
17650Sstevel@tonic-gate 	 *
17660Sstevel@tonic-gate 	 * On II, the WP writes poisoned data back to memory, which will
17670Sstevel@tonic-gate 	 * cause a UE and a panic or reboot when read.  In this case, we
17680Sstevel@tonic-gate 	 * don't need to panic at this time.  On IIi and IIe,
17690Sstevel@tonic-gate 	 * aflt->flt_panic is already set above.
17700Sstevel@tonic-gate 	 */
17710Sstevel@tonic-gate 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
17720Sstevel@tonic-gate 	    (t_afsr & P_AFSR_WP));
17730Sstevel@tonic-gate 
17740Sstevel@tonic-gate 	/*
17750Sstevel@tonic-gate 	 * Make a final sanity check to make sure we did not get any more async
17760Sstevel@tonic-gate 	 * errors and accumulate the afsr.
17770Sstevel@tonic-gate 	 */
17780Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
17790Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
17800Sstevel@tonic-gate 	(void) clear_errors(&spf_flt, NULL);
17810Sstevel@tonic-gate 
17820Sstevel@tonic-gate 	/*
17830Sstevel@tonic-gate 	 * Take care of a special case: If there is a UE in the ecache flush
17840Sstevel@tonic-gate 	 * area, we'll see it in flush_ecache().  This will trigger the
17850Sstevel@tonic-gate 	 * CPU_ADDITIONAL_ERRORS case below.
17860Sstevel@tonic-gate 	 *
17870Sstevel@tonic-gate 	 * This could occur if the original error was a UE in the flush area,
17880Sstevel@tonic-gate 	 * or if the original error was an E$ error that was flushed out of
17890Sstevel@tonic-gate 	 * the E$ in scan_ecache().
17900Sstevel@tonic-gate 	 *
17910Sstevel@tonic-gate 	 * If it's at the same address that we're already logging, then it's
17920Sstevel@tonic-gate 	 * probably one of these cases.  Clear the bit so we don't trip over
17930Sstevel@tonic-gate 	 * it on the additional errors case, which could cause an unnecessary
17940Sstevel@tonic-gate 	 * panic.
17950Sstevel@tonic-gate 	 */
17960Sstevel@tonic-gate 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
17970Sstevel@tonic-gate 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
17980Sstevel@tonic-gate 	else
17990Sstevel@tonic-gate 		acc_afsr |= aflt->flt_stat;
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 	/*
18020Sstevel@tonic-gate 	 * Check the acumulated afsr for the important bits.
18030Sstevel@tonic-gate 	 * Make sure the spf_flt.flt_type value is set, and
18040Sstevel@tonic-gate 	 * enque an error.
18050Sstevel@tonic-gate 	 */
18060Sstevel@tonic-gate 	if (acc_afsr &
18070Sstevel@tonic-gate 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
18080Sstevel@tonic-gate 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
18090Sstevel@tonic-gate 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
18100Sstevel@tonic-gate 		    P_AFSR_ISAP))
18110Sstevel@tonic-gate 			aflt->flt_panic = 1;
18120Sstevel@tonic-gate 
18130Sstevel@tonic-gate 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
18140Sstevel@tonic-gate 		aflt->flt_stat = acc_afsr;
18150Sstevel@tonic-gate 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
18160Sstevel@tonic-gate 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
18170Sstevel@tonic-gate 		    aflt->flt_panic);
18180Sstevel@tonic-gate 	}
18190Sstevel@tonic-gate 
18200Sstevel@tonic-gate 	/*
18210Sstevel@tonic-gate 	 * If aflt->flt_panic is set at this point, we need to panic as the
18220Sstevel@tonic-gate 	 * result of a trap at TL > 0, or an error we determined to be fatal.
18230Sstevel@tonic-gate 	 * We've already enqueued the error in one of the if-clauses above,
18240Sstevel@tonic-gate 	 * and it will be dequeued and logged as part of the panic flow.
18250Sstevel@tonic-gate 	 */
18260Sstevel@tonic-gate 	if (aflt->flt_panic) {
18270Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
18280Sstevel@tonic-gate 		    "See previous message(s) for details", " %sError(s)",
18290Sstevel@tonic-gate 		    pr_reason);
18300Sstevel@tonic-gate 	}
18310Sstevel@tonic-gate 
18320Sstevel@tonic-gate 	/*
18330Sstevel@tonic-gate 	 * Before returning, we must re-enable errors, and
18340Sstevel@tonic-gate 	 * reset the caches to their boot-up state.
18350Sstevel@tonic-gate 	 */
18360Sstevel@tonic-gate 	set_lsu(get_lsu() | cache_boot_state);
18370Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
18380Sstevel@tonic-gate }
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate /*
18410Sstevel@tonic-gate  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
18420Sstevel@tonic-gate  * This routine is shared by the CE and UE handling code.
18430Sstevel@tonic-gate  */
18440Sstevel@tonic-gate static void
18450Sstevel@tonic-gate check_misc_err(spitf_async_flt *spf_flt)
18460Sstevel@tonic-gate {
18470Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
18480Sstevel@tonic-gate 	char *fatal_str = NULL;
18490Sstevel@tonic-gate 
18500Sstevel@tonic-gate 	/*
18510Sstevel@tonic-gate 	 * The ISAP and ETP errors are supposed to cause a POR
18520Sstevel@tonic-gate 	 * from the system, so in theory we never, ever see these messages.
18530Sstevel@tonic-gate 	 * ISAP, ETP and IVUE are considered to be fatal.
18540Sstevel@tonic-gate 	 */
18550Sstevel@tonic-gate 	if (aflt->flt_stat & P_AFSR_ISAP)
18560Sstevel@tonic-gate 		fatal_str = " System Address Parity Error on";
18570Sstevel@tonic-gate 	else if (aflt->flt_stat & P_AFSR_ETP)
18580Sstevel@tonic-gate 		fatal_str = " Ecache Tag Parity Error on";
18590Sstevel@tonic-gate 	else if (aflt->flt_stat & P_AFSR_IVUE)
18600Sstevel@tonic-gate 		fatal_str = " Interrupt Vector Uncorrectable Error on";
18610Sstevel@tonic-gate 	if (fatal_str != NULL) {
18620Sstevel@tonic-gate 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
18630Sstevel@tonic-gate 			NULL, fatal_str);
18640Sstevel@tonic-gate 	}
18650Sstevel@tonic-gate }
18660Sstevel@tonic-gate 
18670Sstevel@tonic-gate /*
18680Sstevel@tonic-gate  * Routine to convert a syndrome into a syndrome code.
18690Sstevel@tonic-gate  */
18700Sstevel@tonic-gate static int
18710Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd)
18720Sstevel@tonic-gate {
18730Sstevel@tonic-gate 	if (synd_status != AFLT_STAT_VALID)
18740Sstevel@tonic-gate 		return (-1);
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	/*
18770Sstevel@tonic-gate 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
18780Sstevel@tonic-gate 	 * to get the code indicating which bit(s) is(are) bad.
18790Sstevel@tonic-gate 	 */
18800Sstevel@tonic-gate 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
18810Sstevel@tonic-gate 		return (-1);
18820Sstevel@tonic-gate 	else
18830Sstevel@tonic-gate 		return (ecc_syndrome_tab[synd]);
18840Sstevel@tonic-gate }
18850Sstevel@tonic-gate 
18860Sstevel@tonic-gate /*
18870Sstevel@tonic-gate  * Routine to return a string identifying the physical name
18880Sstevel@tonic-gate  * associated with a memory/cache error.
18890Sstevel@tonic-gate  */
18900Sstevel@tonic-gate /* ARGSUSED */
18910Sstevel@tonic-gate int
18920Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
18930Sstevel@tonic-gate     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
18940Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
18950Sstevel@tonic-gate {
18960Sstevel@tonic-gate 	short synd_code;
18970Sstevel@tonic-gate 	int ret;
18980Sstevel@tonic-gate 
18990Sstevel@tonic-gate 	if (flt_in_memory) {
19000Sstevel@tonic-gate 		synd_code = synd_to_synd_code(synd_status, synd);
19010Sstevel@tonic-gate 		if (synd_code == -1) {
19020Sstevel@tonic-gate 			ret = EINVAL;
19030Sstevel@tonic-gate 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
19040Sstevel@tonic-gate 		    buf, buflen, lenp) != 0) {
19050Sstevel@tonic-gate 			ret = EIO;
19060Sstevel@tonic-gate 		} else if (*lenp <= 1) {
19070Sstevel@tonic-gate 			ret = EINVAL;
19080Sstevel@tonic-gate 		} else {
19090Sstevel@tonic-gate 			ret = 0;
19100Sstevel@tonic-gate 		}
19110Sstevel@tonic-gate 	} else {
19120Sstevel@tonic-gate 		ret = ENOTSUP;
19130Sstevel@tonic-gate 	}
19140Sstevel@tonic-gate 
19150Sstevel@tonic-gate 	if (ret != 0) {
19160Sstevel@tonic-gate 		buf[0] = '\0';
19170Sstevel@tonic-gate 		*lenp = 0;
19180Sstevel@tonic-gate 	}
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 	return (ret);
19210Sstevel@tonic-gate }
19220Sstevel@tonic-gate 
19230Sstevel@tonic-gate /*
19240Sstevel@tonic-gate  * Wrapper for cpu_get_mem_unum() routine that takes an
19250Sstevel@tonic-gate  * async_flt struct rather than explicit arguments.
19260Sstevel@tonic-gate  */
19270Sstevel@tonic-gate int
19280Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
19290Sstevel@tonic-gate     char *buf, int buflen, int *lenp)
19300Sstevel@tonic-gate {
19310Sstevel@tonic-gate 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
19320Sstevel@tonic-gate 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
19330Sstevel@tonic-gate 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
19340Sstevel@tonic-gate }
19350Sstevel@tonic-gate 
19360Sstevel@tonic-gate /*
19370Sstevel@tonic-gate  * This routine is a more generic interface to cpu_get_mem_unum(),
19380Sstevel@tonic-gate  * that may be used by other modules (e.g. mm).
19390Sstevel@tonic-gate  */
19400Sstevel@tonic-gate int
19410Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
19420Sstevel@tonic-gate 		char *buf, int buflen, int *lenp)
19430Sstevel@tonic-gate {
19440Sstevel@tonic-gate 	int synd_status, flt_in_memory, ret;
19450Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 	/*
19480Sstevel@tonic-gate 	 * Check for an invalid address.
19490Sstevel@tonic-gate 	 */
19500Sstevel@tonic-gate 	if (afar == (uint64_t)-1)
19510Sstevel@tonic-gate 		return (ENXIO);
19520Sstevel@tonic-gate 
19530Sstevel@tonic-gate 	if (synd == (uint64_t)-1)
19540Sstevel@tonic-gate 		synd_status = AFLT_STAT_INVALID;
19550Sstevel@tonic-gate 	else
19560Sstevel@tonic-gate 		synd_status = AFLT_STAT_VALID;
19570Sstevel@tonic-gate 
19580Sstevel@tonic-gate 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
19610Sstevel@tonic-gate 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
19620Sstevel@tonic-gate 	    != 0)
19630Sstevel@tonic-gate 		return (ret);
19640Sstevel@tonic-gate 
19650Sstevel@tonic-gate 	if (*lenp >= buflen)
19660Sstevel@tonic-gate 		return (ENAMETOOLONG);
19670Sstevel@tonic-gate 
19680Sstevel@tonic-gate 	(void) strncpy(buf, unum, buflen);
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate 	return (0);
19710Sstevel@tonic-gate }
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate /*
19740Sstevel@tonic-gate  * Routine to return memory information associated
19750Sstevel@tonic-gate  * with a physical address and syndrome.
19760Sstevel@tonic-gate  */
19770Sstevel@tonic-gate /* ARGSUSED */
19780Sstevel@tonic-gate int
19790Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar,
19800Sstevel@tonic-gate     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
19810Sstevel@tonic-gate     int *segsp, int *banksp, int *mcidp)
19820Sstevel@tonic-gate {
19830Sstevel@tonic-gate 	return (ENOTSUP);
19840Sstevel@tonic-gate }
19850Sstevel@tonic-gate 
19860Sstevel@tonic-gate /*
19870Sstevel@tonic-gate  * Routine to return a string identifying the physical
19880Sstevel@tonic-gate  * name associated with a cpuid.
19890Sstevel@tonic-gate  */
19900Sstevel@tonic-gate /* ARGSUSED */
19910Sstevel@tonic-gate int
19920Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
19930Sstevel@tonic-gate {
19940Sstevel@tonic-gate 	return (ENOTSUP);
19950Sstevel@tonic-gate }
19960Sstevel@tonic-gate 
19970Sstevel@tonic-gate /*
19980Sstevel@tonic-gate  * This routine returns the size of the kernel's FRU name buffer.
19990Sstevel@tonic-gate  */
20000Sstevel@tonic-gate size_t
20010Sstevel@tonic-gate cpu_get_name_bufsize()
20020Sstevel@tonic-gate {
20030Sstevel@tonic-gate 	return (UNUM_NAMLEN);
20040Sstevel@tonic-gate }
20050Sstevel@tonic-gate 
20060Sstevel@tonic-gate /*
20070Sstevel@tonic-gate  * Cpu specific log func for UEs.
20080Sstevel@tonic-gate  */
20090Sstevel@tonic-gate static void
20100Sstevel@tonic-gate log_ue_err(struct async_flt *aflt, char *unum)
20110Sstevel@tonic-gate {
20120Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
20130Sstevel@tonic-gate 	int len = 0;
20140Sstevel@tonic-gate 
20150Sstevel@tonic-gate #ifdef DEBUG
20160Sstevel@tonic-gate 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
20170Sstevel@tonic-gate 
20180Sstevel@tonic-gate 	/*
20190Sstevel@tonic-gate 	 * Paranoid Check for priv mismatch
20200Sstevel@tonic-gate 	 * Only applicable for UEs
20210Sstevel@tonic-gate 	 */
20220Sstevel@tonic-gate 	if (afsr_priv != aflt->flt_priv) {
20230Sstevel@tonic-gate 		/*
20240Sstevel@tonic-gate 		 * The priv bits in %tstate and %afsr did not match; we expect
20250Sstevel@tonic-gate 		 * this to be very rare, so flag it with a message.
20260Sstevel@tonic-gate 		 */
20270Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
20280Sstevel@tonic-gate 		    ": PRIV bit in TSTATE and AFSR mismatched; "
20290Sstevel@tonic-gate 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate 		/* update saved afsr to reflect the correct priv */
20320Sstevel@tonic-gate 		aflt->flt_stat &= ~P_AFSR_PRIV;
20330Sstevel@tonic-gate 		if (aflt->flt_priv)
20340Sstevel@tonic-gate 			aflt->flt_stat |= P_AFSR_PRIV;
20350Sstevel@tonic-gate 	}
20360Sstevel@tonic-gate #endif /* DEBUG */
20370Sstevel@tonic-gate 
20380Sstevel@tonic-gate 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
20390Sstevel@tonic-gate 	    UNUM_NAMLEN, &len);
20400Sstevel@tonic-gate 
20410Sstevel@tonic-gate 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
20420Sstevel@tonic-gate 	    " Uncorrectable Memory Error on");
20430Sstevel@tonic-gate 
20440Sstevel@tonic-gate 	if (SYND(aflt->flt_synd) == 0x3) {
20450Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
20460Sstevel@tonic-gate 		    " Syndrome 0x3 indicates that this may not be a "
20470Sstevel@tonic-gate 		    "memory module problem");
20480Sstevel@tonic-gate 	}
20490Sstevel@tonic-gate 
20500Sstevel@tonic-gate 	if (aflt->flt_in_memory)
20510Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
20520Sstevel@tonic-gate }
20530Sstevel@tonic-gate 
20540Sstevel@tonic-gate 
20550Sstevel@tonic-gate /*
20560Sstevel@tonic-gate  * The cpu_async_log_err() function is called via the ue_drain() function to
20570Sstevel@tonic-gate  * handle logging for CPU events that are dequeued.  As such, it can be invoked
20580Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
20590Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
20600Sstevel@tonic-gate  */
20610Sstevel@tonic-gate static void
20620Sstevel@tonic-gate cpu_async_log_err(void *flt)
20630Sstevel@tonic-gate {
20640Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
20650Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)flt;
20660Sstevel@tonic-gate 	char unum[UNUM_NAMLEN];
20670Sstevel@tonic-gate 	char *space;
20680Sstevel@tonic-gate 	char *ecache_scrub_logstr = NULL;
20690Sstevel@tonic-gate 
20700Sstevel@tonic-gate 	switch (spf_flt->flt_type) {
20710Sstevel@tonic-gate 	    case CPU_UE_ERR:
20720Sstevel@tonic-gate 		/*
20730Sstevel@tonic-gate 		 * We want to skip logging only if ALL the following
20740Sstevel@tonic-gate 		 * conditions are true:
20750Sstevel@tonic-gate 		 *
20760Sstevel@tonic-gate 		 *	1. We are not panicking
20770Sstevel@tonic-gate 		 *	2. There is only one error
20780Sstevel@tonic-gate 		 *	3. That error is a memory error
20790Sstevel@tonic-gate 		 *	4. The error is caused by the memory scrubber (in
20800Sstevel@tonic-gate 		 *	   which case the error will have occurred under
20810Sstevel@tonic-gate 		 *	   on_trap protection)
20820Sstevel@tonic-gate 		 *	5. The error is on a retired page
20830Sstevel@tonic-gate 		 *
20840Sstevel@tonic-gate 		 * Note 1: AFLT_PROT_EC is used places other than the memory
20850Sstevel@tonic-gate 		 * scrubber.  However, none of those errors should occur
20860Sstevel@tonic-gate 		 * on a retired page.
20870Sstevel@tonic-gate 		 *
20880Sstevel@tonic-gate 		 * Note 2: In the CE case, these errors are discarded before
20890Sstevel@tonic-gate 		 * the errorq.  In the UE case, we must wait until now --
20900Sstevel@tonic-gate 		 * softcall() grabs a mutex, which we can't do at a high PIL.
20910Sstevel@tonic-gate 		 */
20920Sstevel@tonic-gate 		if (!panicstr &&
20930Sstevel@tonic-gate 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
20940Sstevel@tonic-gate 		    aflt->flt_prot == AFLT_PROT_EC) {
20950Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
20960Sstevel@tonic-gate 			    (aflt->flt_addr >> MMU_PAGESHIFT));
20970Sstevel@tonic-gate 
20980Sstevel@tonic-gate 			if (pp != NULL && page_isretired(pp)) {
20990Sstevel@tonic-gate 
21000Sstevel@tonic-gate 				/* Zero the address to clear the error */
21010Sstevel@tonic-gate 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
21020Sstevel@tonic-gate 				return;
21030Sstevel@tonic-gate 			}
21040Sstevel@tonic-gate 		}
21050Sstevel@tonic-gate 
21060Sstevel@tonic-gate 		/*
21070Sstevel@tonic-gate 		 * Log the UE and check for causes of this UE error that
21080Sstevel@tonic-gate 		 * don't cause a trap (Copyback error).  cpu_async_error()
21090Sstevel@tonic-gate 		 * has already checked the i/o buses for us.
21100Sstevel@tonic-gate 		 */
21110Sstevel@tonic-gate 		log_ue_err(aflt, unum);
21120Sstevel@tonic-gate 		if (aflt->flt_in_memory)
21130Sstevel@tonic-gate 			cpu_check_allcpus(aflt);
21140Sstevel@tonic-gate 		break;
21150Sstevel@tonic-gate 
21160Sstevel@tonic-gate 	    case CPU_EDP_LDP_ERR:
21170Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_EDP)
21180Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
21190Sstevel@tonic-gate 			    NULL, " EDP event on");
21200Sstevel@tonic-gate 
21210Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_LDP)
21220Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
21230Sstevel@tonic-gate 			    NULL, " LDP event on");
21240Sstevel@tonic-gate 
21250Sstevel@tonic-gate 		/* Log ecache info if exist */
21260Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0) {
21270Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
21280Sstevel@tonic-gate 
21290Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
21300Sstevel@tonic-gate 			    NULL, " AFAR was derived from E$Tag");
21310Sstevel@tonic-gate 		} else {
21320Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
21330Sstevel@tonic-gate 			    NULL, " No error found in ecache (No fault "
21340Sstevel@tonic-gate 			    "PA available)");
21350Sstevel@tonic-gate 		}
21360Sstevel@tonic-gate 		break;
21370Sstevel@tonic-gate 
21380Sstevel@tonic-gate 	    case CPU_WP_ERR:
21390Sstevel@tonic-gate 		/*
21400Sstevel@tonic-gate 		 * If the memscrub thread hasn't yet read
21410Sstevel@tonic-gate 		 * all of memory, as we requested in the
21420Sstevel@tonic-gate 		 * trap handler, then give it a kick to
21430Sstevel@tonic-gate 		 * make sure it does.
21440Sstevel@tonic-gate 		 */
21450Sstevel@tonic-gate 		if (!isus2i && !isus2e && read_all_memscrub)
21460Sstevel@tonic-gate 			memscrub_run();
21470Sstevel@tonic-gate 
21480Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
21490Sstevel@tonic-gate 		    " WP event on");
21500Sstevel@tonic-gate 		return;
21510Sstevel@tonic-gate 
21520Sstevel@tonic-gate 	    case CPU_BTO_BERR_ERR:
21530Sstevel@tonic-gate 		/*
21540Sstevel@tonic-gate 		 * A bus timeout or error occurred that was in user mode or not
21550Sstevel@tonic-gate 		 * in a protected kernel code region.
21560Sstevel@tonic-gate 		 */
21570Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_BERR) {
21580Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
21590Sstevel@tonic-gate 			    spf_flt, BERRTO_LFLAGS, NULL,
21600Sstevel@tonic-gate 			    " Bus Error on System Bus in %s mode from",
21610Sstevel@tonic-gate 			    aflt->flt_priv ? "privileged" : "user");
21620Sstevel@tonic-gate 		}
21630Sstevel@tonic-gate 
21640Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_TO) {
21650Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
21660Sstevel@tonic-gate 			    spf_flt, BERRTO_LFLAGS, NULL,
21670Sstevel@tonic-gate 			    " Timeout on System Bus in %s mode from",
21680Sstevel@tonic-gate 			    aflt->flt_priv ? "privileged" : "user");
21690Sstevel@tonic-gate 		}
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 		return;
21720Sstevel@tonic-gate 
21730Sstevel@tonic-gate 	    case CPU_PANIC_CP_ERR:
21740Sstevel@tonic-gate 		/*
21750Sstevel@tonic-gate 		 * Process the Copyback (CP) error info (if any) obtained from
21760Sstevel@tonic-gate 		 * polling all the cpus in the panic flow. This case is only
21770Sstevel@tonic-gate 		 * entered if we are panicking.
21780Sstevel@tonic-gate 		 */
21790Sstevel@tonic-gate 		ASSERT(panicstr != NULL);
21800Sstevel@tonic-gate 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 		/* See which space - this info may not exist */
21830Sstevel@tonic-gate 		if (panic_aflt.flt_status & ECC_D_TRAP)
21840Sstevel@tonic-gate 			space = "Data ";
21850Sstevel@tonic-gate 		else if (panic_aflt.flt_status & ECC_I_TRAP)
21860Sstevel@tonic-gate 			space = "Instruction ";
21870Sstevel@tonic-gate 		else
21880Sstevel@tonic-gate 			space = "";
21890Sstevel@tonic-gate 
21900Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
21910Sstevel@tonic-gate 		    " AFAR was derived from UE report,"
21920Sstevel@tonic-gate 		    " CP event on CPU%d (caused %saccess error on %s%d)",
21930Sstevel@tonic-gate 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
21940Sstevel@tonic-gate 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
21950Sstevel@tonic-gate 
21960Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0)
21970Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
21980Sstevel@tonic-gate 		else
21990Sstevel@tonic-gate 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
22000Sstevel@tonic-gate 			    NULL, " No cache dump available");
22010Sstevel@tonic-gate 
22020Sstevel@tonic-gate 		return;
22030Sstevel@tonic-gate 
22040Sstevel@tonic-gate 	    case CPU_TRAPPING_CP_ERR:
22050Sstevel@tonic-gate 		/*
22060Sstevel@tonic-gate 		 * For sabre only.  This is a copyback ecache parity error due
22070Sstevel@tonic-gate 		 * to a PCI DMA read.  We should be panicking if we get here.
22080Sstevel@tonic-gate 		 */
22090Sstevel@tonic-gate 		ASSERT(panicstr != NULL);
22100Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
22110Sstevel@tonic-gate 		    " AFAR was derived from UE report,"
22120Sstevel@tonic-gate 		    " CP event on CPU%d (caused Data access error "
22130Sstevel@tonic-gate 		    "on PCIBus)", aflt->flt_inst);
22140Sstevel@tonic-gate 		return;
22150Sstevel@tonic-gate 
22160Sstevel@tonic-gate 		/*
22170Sstevel@tonic-gate 		 * We log the ecache lines of the following states,
22180Sstevel@tonic-gate 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
22190Sstevel@tonic-gate 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
22200Sstevel@tonic-gate 		 * in addition to logging if ecache_scrub_panic is set.
22210Sstevel@tonic-gate 		 */
22220Sstevel@tonic-gate 	    case CPU_BADLINE_CI_ERR:
22230Sstevel@tonic-gate 		ecache_scrub_logstr = "CBI";
22240Sstevel@tonic-gate 		/* FALLTHRU */
22250Sstevel@tonic-gate 
22260Sstevel@tonic-gate 	    case CPU_BADLINE_CB_ERR:
22270Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
22280Sstevel@tonic-gate 			ecache_scrub_logstr = "CBB";
22290Sstevel@tonic-gate 		/* FALLTHRU */
22300Sstevel@tonic-gate 
22310Sstevel@tonic-gate 	    case CPU_BADLINE_DI_ERR:
22320Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
22330Sstevel@tonic-gate 			ecache_scrub_logstr = "DBI";
22340Sstevel@tonic-gate 		/* FALLTHRU */
22350Sstevel@tonic-gate 
22360Sstevel@tonic-gate 	    case CPU_BADLINE_DB_ERR:
22370Sstevel@tonic-gate 		if (ecache_scrub_logstr == NULL)
22380Sstevel@tonic-gate 			ecache_scrub_logstr = "DBB";
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
22410Sstevel@tonic-gate 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
22420Sstevel@tonic-gate 			" %s event on", ecache_scrub_logstr);
22430Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
22440Sstevel@tonic-gate 
22450Sstevel@tonic-gate 		return;
22460Sstevel@tonic-gate 
22470Sstevel@tonic-gate 	    case CPU_ORPHAN_CP_ERR:
22480Sstevel@tonic-gate 		/*
22490Sstevel@tonic-gate 		 * Orphan CPs, where the CP bit is set, but when a CPU
22500Sstevel@tonic-gate 		 * doesn't report a UE.
22510Sstevel@tonic-gate 		 */
22520Sstevel@tonic-gate 		if (read_all_memscrub)
22530Sstevel@tonic-gate 			memscrub_run();
22540Sstevel@tonic-gate 
22550Sstevel@tonic-gate 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
22560Sstevel@tonic-gate 			NULL, " Orphan CP event on");
22570Sstevel@tonic-gate 
22580Sstevel@tonic-gate 		/* Log ecache info if exist */
22590Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 0)
22600Sstevel@tonic-gate 			cpu_log_ecmem_info(spf_flt);
22610Sstevel@tonic-gate 		else
22620Sstevel@tonic-gate 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
22630Sstevel@tonic-gate 				(CP_LFLAGS | CPU_FLTCPU), NULL,
22640Sstevel@tonic-gate 				" No error found in ecache (No fault "
22650Sstevel@tonic-gate 				"PA available");
22660Sstevel@tonic-gate 		return;
22670Sstevel@tonic-gate 
22680Sstevel@tonic-gate 	    case CPU_ECACHE_ADDR_PAR_ERR:
22690Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
22700Sstevel@tonic-gate 				" E$ Tag Address Parity error on");
22710Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
22720Sstevel@tonic-gate 		return;
22730Sstevel@tonic-gate 
22740Sstevel@tonic-gate 	    case CPU_ECACHE_STATE_ERR:
22750Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
22760Sstevel@tonic-gate 				" E$ Tag State Parity error on");
22770Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
22780Sstevel@tonic-gate 		return;
22790Sstevel@tonic-gate 
22800Sstevel@tonic-gate 	    case CPU_ECACHE_TAG_ERR:
22810Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
22820Sstevel@tonic-gate 				" E$ Tag scrub event on");
22830Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
22840Sstevel@tonic-gate 		return;
22850Sstevel@tonic-gate 
22860Sstevel@tonic-gate 	    case CPU_ECACHE_ETP_ETS_ERR:
22870Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
22880Sstevel@tonic-gate 				" AFSR.ETP is set and AFSR.ETS is zero on");
22890Sstevel@tonic-gate 		cpu_log_ecmem_info(spf_flt);
22900Sstevel@tonic-gate 		return;
22910Sstevel@tonic-gate 
22920Sstevel@tonic-gate 
22930Sstevel@tonic-gate 	    case CPU_ADDITIONAL_ERR:
22940Sstevel@tonic-gate 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
22950Sstevel@tonic-gate 		    " Additional errors detected during error processing on");
22960Sstevel@tonic-gate 		return;
22970Sstevel@tonic-gate 
22980Sstevel@tonic-gate 	    default:
22990Sstevel@tonic-gate 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
23000Sstevel@tonic-gate 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
23010Sstevel@tonic-gate 		return;
23020Sstevel@tonic-gate 	}
23030Sstevel@tonic-gate 
23040Sstevel@tonic-gate 	/* ... fall through from the UE, EDP, or LDP cases */
23050Sstevel@tonic-gate 
23060Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
23070Sstevel@tonic-gate 		if (!panicstr) {
23080Sstevel@tonic-gate 			/*
23090Sstevel@tonic-gate 			 * Retire the bad page that caused the error
23100Sstevel@tonic-gate 			 */
23110Sstevel@tonic-gate 			page_t *pp = page_numtopp_nolock((pfn_t)
23120Sstevel@tonic-gate 			    (aflt->flt_addr >> MMU_PAGESHIFT));
23130Sstevel@tonic-gate 
23140Sstevel@tonic-gate 			if (pp != NULL) {
23150Sstevel@tonic-gate 				page_settoxic(pp, PAGE_IS_FAULTY);
23160Sstevel@tonic-gate 				(void) page_retire(pp, PAGE_IS_TOXIC);
23170Sstevel@tonic-gate 			} else {
23180Sstevel@tonic-gate 				uint64_t pa =
23190Sstevel@tonic-gate 				    P2ALIGN(aflt->flt_addr, MMU_PAGESIZE);
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 3, spf_flt,
23220Sstevel@tonic-gate 				    CPU_ERRID_FIRST, NULL,
23230Sstevel@tonic-gate 				    ": cannot schedule clearing of error on "
23240Sstevel@tonic-gate 				    "page 0x%08x.%08x; page not in VM system",
23250Sstevel@tonic-gate 				    (uint32_t)(pa >> 32), (uint32_t)pa);
23260Sstevel@tonic-gate 			}
23270Sstevel@tonic-gate 		} else {
23280Sstevel@tonic-gate 			/*
23290Sstevel@tonic-gate 			 * Clear UEs on panic so that we don't
23300Sstevel@tonic-gate 			 * get haunted by them during panic or
23310Sstevel@tonic-gate 			 * after reboot
23320Sstevel@tonic-gate 			 */
23330Sstevel@tonic-gate 			clearphys(P2ALIGN(aflt->flt_addr, 64),
23340Sstevel@tonic-gate 			    cpunodes[CPU->cpu_id].ecache_size,
23350Sstevel@tonic-gate 			    cpunodes[CPU->cpu_id].ecache_linesize);
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate 			(void) clear_errors(NULL, NULL);
23380Sstevel@tonic-gate 		}
23390Sstevel@tonic-gate 	}
23400Sstevel@tonic-gate 
23410Sstevel@tonic-gate 	/*
23420Sstevel@tonic-gate 	 * Log final recover message
23430Sstevel@tonic-gate 	 */
23440Sstevel@tonic-gate 	if (!panicstr) {
23450Sstevel@tonic-gate 		if (!aflt->flt_priv) {
23460Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
23470Sstevel@tonic-gate 			    NULL, " Above Error is in User Mode"
23480Sstevel@tonic-gate 			    "\n    and is fatal: "
23490Sstevel@tonic-gate 			    "will SIGKILL process and notify contract");
23500Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
23510Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
23520Sstevel@tonic-gate 			    NULL, " Above Error detected while dumping core;"
23530Sstevel@tonic-gate 			    "\n    core file will be truncated");
23540Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
23550Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
23560Sstevel@tonic-gate 			    NULL, " Above Error is due to Kernel access"
23570Sstevel@tonic-gate 			    "\n    to User space and is fatal: "
23580Sstevel@tonic-gate 			    "will SIGKILL process and notify contract");
23590Sstevel@tonic-gate 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
23600Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
23610Sstevel@tonic-gate 			    " Above Error detected by protected Kernel code"
23620Sstevel@tonic-gate 			    "\n    that will try to clear error from system");
23630Sstevel@tonic-gate 		}
23640Sstevel@tonic-gate 	}
23650Sstevel@tonic-gate }
23660Sstevel@tonic-gate 
23670Sstevel@tonic-gate 
23680Sstevel@tonic-gate /*
23690Sstevel@tonic-gate  * Check all cpus for non-trapping UE-causing errors
23700Sstevel@tonic-gate  * In Ultra I/II, we look for copyback errors (CPs)
23710Sstevel@tonic-gate  */
23720Sstevel@tonic-gate void
23730Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt)
23740Sstevel@tonic-gate {
23750Sstevel@tonic-gate 	spitf_async_flt cp;
23760Sstevel@tonic-gate 	spitf_async_flt *spf_cpflt = &cp;
23770Sstevel@tonic-gate 	struct async_flt *cpflt = (struct async_flt *)&cp;
23780Sstevel@tonic-gate 	int pix;
23790Sstevel@tonic-gate 
23800Sstevel@tonic-gate 	cpflt->flt_id = aflt->flt_id;
23810Sstevel@tonic-gate 	cpflt->flt_addr = aflt->flt_addr;
23820Sstevel@tonic-gate 
23830Sstevel@tonic-gate 	for (pix = 0; pix < NCPU; pix++) {
23840Sstevel@tonic-gate 		if (CPU_XCALL_READY(pix)) {
23850Sstevel@tonic-gate 			xc_one(pix, (xcfunc_t *)get_cpu_status,
23860Sstevel@tonic-gate 			    (uint64_t)cpflt, 0);
23870Sstevel@tonic-gate 
23880Sstevel@tonic-gate 			if (cpflt->flt_stat & P_AFSR_CP) {
23890Sstevel@tonic-gate 				char *space;
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate 				/* See which space - this info may not exist */
23920Sstevel@tonic-gate 				if (aflt->flt_status & ECC_D_TRAP)
23930Sstevel@tonic-gate 					space = "Data ";
23940Sstevel@tonic-gate 				else if (aflt->flt_status & ECC_I_TRAP)
23950Sstevel@tonic-gate 					space = "Instruction ";
23960Sstevel@tonic-gate 				else
23970Sstevel@tonic-gate 					space = "";
23980Sstevel@tonic-gate 
23990Sstevel@tonic-gate 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
24000Sstevel@tonic-gate 				    NULL, " AFAR was derived from UE report,"
24010Sstevel@tonic-gate 				    " CP event on CPU%d (caused %saccess "
24020Sstevel@tonic-gate 				    "error on %s%d)", pix, space,
24030Sstevel@tonic-gate 				    (aflt->flt_status & ECC_IOBUS) ?
24040Sstevel@tonic-gate 				    "IOBUS" : "CPU", aflt->flt_bus_id);
24050Sstevel@tonic-gate 
24060Sstevel@tonic-gate 				if (spf_cpflt->flt_ec_lcnt > 0)
24070Sstevel@tonic-gate 					cpu_log_ecmem_info(spf_cpflt);
24080Sstevel@tonic-gate 				else
24090Sstevel@tonic-gate 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
24100Sstevel@tonic-gate 					    CPU_ERRID_FIRST, NULL,
24110Sstevel@tonic-gate 					    " No cache dump available");
24120Sstevel@tonic-gate 			}
24130Sstevel@tonic-gate 		}
24140Sstevel@tonic-gate 	}
24150Sstevel@tonic-gate }
24160Sstevel@tonic-gate 
24170Sstevel@tonic-gate #ifdef DEBUG
24180Sstevel@tonic-gate int test_mp_cp = 0;
24190Sstevel@tonic-gate #endif
24200Sstevel@tonic-gate 
24210Sstevel@tonic-gate /*
24220Sstevel@tonic-gate  * Cross-call callback routine to tell a CPU to read its own %afsr to check
24230Sstevel@tonic-gate  * for copyback errors and capture relevant information.
24240Sstevel@tonic-gate  */
24250Sstevel@tonic-gate static uint_t
24260Sstevel@tonic-gate get_cpu_status(uint64_t arg)
24270Sstevel@tonic-gate {
24280Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)arg;
24290Sstevel@tonic-gate 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
24300Sstevel@tonic-gate 	uint64_t afsr;
24310Sstevel@tonic-gate 	uint32_t ec_idx;
24320Sstevel@tonic-gate 	uint64_t sdbh, sdbl;
24330Sstevel@tonic-gate 	int i;
24340Sstevel@tonic-gate 	uint32_t ec_set_size;
24350Sstevel@tonic-gate 	uchar_t valid;
24360Sstevel@tonic-gate 	ec_data_t ec_data[8];
24370Sstevel@tonic-gate 	uint64_t ec_tag, flt_addr_tag, oafsr;
24380Sstevel@tonic-gate 	uint64_t *acc_afsr = NULL;
24390Sstevel@tonic-gate 
24400Sstevel@tonic-gate 	get_asyncflt(&afsr);
24410Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) != NULL) {
24420Sstevel@tonic-gate 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
24430Sstevel@tonic-gate 		afsr |= *acc_afsr;
24440Sstevel@tonic-gate 		*acc_afsr = 0;
24450Sstevel@tonic-gate 	}
24460Sstevel@tonic-gate 
24470Sstevel@tonic-gate #ifdef DEBUG
24480Sstevel@tonic-gate 	if (test_mp_cp)
24490Sstevel@tonic-gate 		afsr |= P_AFSR_CP;
24500Sstevel@tonic-gate #endif
24510Sstevel@tonic-gate 	aflt->flt_stat = afsr;
24520Sstevel@tonic-gate 
24530Sstevel@tonic-gate 	if (afsr & P_AFSR_CP) {
24540Sstevel@tonic-gate 		/*
24550Sstevel@tonic-gate 		 * Capture the UDBs
24560Sstevel@tonic-gate 		 */
24570Sstevel@tonic-gate 		get_udb_errors(&sdbh, &sdbl);
24580Sstevel@tonic-gate 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
24590Sstevel@tonic-gate 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
24600Sstevel@tonic-gate 
24610Sstevel@tonic-gate 		/*
24620Sstevel@tonic-gate 		 * Clear CP bit before capturing ecache data
24630Sstevel@tonic-gate 		 * and AFSR info.
24640Sstevel@tonic-gate 		 */
24650Sstevel@tonic-gate 		set_asyncflt(P_AFSR_CP);
24660Sstevel@tonic-gate 
24670Sstevel@tonic-gate 		/*
24680Sstevel@tonic-gate 		 * See if we can capture the ecache line for the
24690Sstevel@tonic-gate 		 * fault PA.
24700Sstevel@tonic-gate 		 *
24710Sstevel@tonic-gate 		 * Return a valid matching ecache line, if any.
24720Sstevel@tonic-gate 		 * Otherwise, return the first matching ecache
24730Sstevel@tonic-gate 		 * line marked invalid.
24740Sstevel@tonic-gate 		 */
24750Sstevel@tonic-gate 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
24760Sstevel@tonic-gate 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
24770Sstevel@tonic-gate 		    ecache_associativity;
24780Sstevel@tonic-gate 		spf_flt->flt_ec_lcnt = 0;
24790Sstevel@tonic-gate 
24800Sstevel@tonic-gate 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
24810Sstevel@tonic-gate 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
24820Sstevel@tonic-gate 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
24830Sstevel@tonic-gate 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
24840Sstevel@tonic-gate 				    acc_afsr);
24850Sstevel@tonic-gate 
24860Sstevel@tonic-gate 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
24870Sstevel@tonic-gate 				continue;
24880Sstevel@tonic-gate 
24890Sstevel@tonic-gate 			valid = cpu_ec_state_valid &
24900Sstevel@tonic-gate 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
24910Sstevel@tonic-gate 			    cpu_ec_state_shift);
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 			if (valid || spf_flt->flt_ec_lcnt == 0) {
24940Sstevel@tonic-gate 				spf_flt->flt_ec_tag = ec_tag;
24950Sstevel@tonic-gate 				bcopy(&ec_data, &spf_flt->flt_ec_data,
24960Sstevel@tonic-gate 				    sizeof (ec_data));
24970Sstevel@tonic-gate 				spf_flt->flt_ec_lcnt = 1;
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 				if (valid)
25000Sstevel@tonic-gate 					break;
25010Sstevel@tonic-gate 			}
25020Sstevel@tonic-gate 		}
25030Sstevel@tonic-gate 	}
25040Sstevel@tonic-gate 	return (0);
25050Sstevel@tonic-gate }
25060Sstevel@tonic-gate 
25070Sstevel@tonic-gate /*
25080Sstevel@tonic-gate  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
25090Sstevel@tonic-gate  * from panic_idle() as part of the other CPUs stopping themselves when a
25100Sstevel@tonic-gate  * panic occurs.  We need to be VERY careful what we do here, since panicstr
25110Sstevel@tonic-gate  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
25120Sstevel@tonic-gate  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
25130Sstevel@tonic-gate  * CP error information.
25140Sstevel@tonic-gate  */
25150Sstevel@tonic-gate void
25160Sstevel@tonic-gate cpu_async_panic_callb(void)
25170Sstevel@tonic-gate {
25180Sstevel@tonic-gate 	spitf_async_flt cp;
25190Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)&cp;
25200Sstevel@tonic-gate 	uint64_t *scrub_afsr;
25210Sstevel@tonic-gate 
25220Sstevel@tonic-gate 	if (panic_aflt.flt_id != 0) {
25230Sstevel@tonic-gate 		aflt->flt_addr = panic_aflt.flt_addr;
25240Sstevel@tonic-gate 		(void) get_cpu_status((uint64_t)aflt);
25250Sstevel@tonic-gate 
25260Sstevel@tonic-gate 		if (CPU_PRIVATE(CPU) != NULL) {
25270Sstevel@tonic-gate 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
25280Sstevel@tonic-gate 			if (*scrub_afsr & P_AFSR_CP) {
25290Sstevel@tonic-gate 				aflt->flt_stat |= *scrub_afsr;
25300Sstevel@tonic-gate 				*scrub_afsr = 0;
25310Sstevel@tonic-gate 			}
25320Sstevel@tonic-gate 		}
25330Sstevel@tonic-gate 		if (aflt->flt_stat & P_AFSR_CP) {
25340Sstevel@tonic-gate 			aflt->flt_id = panic_aflt.flt_id;
25350Sstevel@tonic-gate 			aflt->flt_panic = 1;
25360Sstevel@tonic-gate 			aflt->flt_inst = CPU->cpu_id;
25370Sstevel@tonic-gate 			aflt->flt_class = CPU_FAULT;
25380Sstevel@tonic-gate 			cp.flt_type = CPU_PANIC_CP_ERR;
25390Sstevel@tonic-gate 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
25400Sstevel@tonic-gate 			    (void *)&cp, sizeof (cp), ue_queue,
25410Sstevel@tonic-gate 			    aflt->flt_panic);
25420Sstevel@tonic-gate 		}
25430Sstevel@tonic-gate 	}
25440Sstevel@tonic-gate }
25450Sstevel@tonic-gate 
25460Sstevel@tonic-gate /*
25470Sstevel@tonic-gate  * Turn off all cpu error detection, normally only used for panics.
25480Sstevel@tonic-gate  */
25490Sstevel@tonic-gate void
25500Sstevel@tonic-gate cpu_disable_errors(void)
25510Sstevel@tonic-gate {
25520Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
25530Sstevel@tonic-gate }
25540Sstevel@tonic-gate 
25550Sstevel@tonic-gate /*
25560Sstevel@tonic-gate  * Enable errors.
25570Sstevel@tonic-gate  */
25580Sstevel@tonic-gate void
25590Sstevel@tonic-gate cpu_enable_errors(void)
25600Sstevel@tonic-gate {
25610Sstevel@tonic-gate 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
25620Sstevel@tonic-gate }
25630Sstevel@tonic-gate 
25640Sstevel@tonic-gate static void
25650Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
25660Sstevel@tonic-gate {
25670Sstevel@tonic-gate 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
25680Sstevel@tonic-gate 	int i, loop = 1;
25690Sstevel@tonic-gate 	ushort_t ecc_0;
25700Sstevel@tonic-gate 	uint64_t paddr;
25710Sstevel@tonic-gate 	uint64_t data;
25720Sstevel@tonic-gate 
25730Sstevel@tonic-gate 	if (verbose)
25740Sstevel@tonic-gate 		loop = 8;
25750Sstevel@tonic-gate 	for (i = 0; i < loop; i++) {
25760Sstevel@tonic-gate 		paddr = aligned_addr + (i * 8);
25770Sstevel@tonic-gate 		data = lddphys(paddr);
25780Sstevel@tonic-gate 		if (verbose) {
25790Sstevel@tonic-gate 			if (ce_err) {
25800Sstevel@tonic-gate 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
25810Sstevel@tonic-gate 			    (uint32_t)data);
25820Sstevel@tonic-gate 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
25830Sstevel@tonic-gate 				NULL, "    Paddr 0x%" PRIx64 ", "
25840Sstevel@tonic-gate 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
25850Sstevel@tonic-gate 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
25860Sstevel@tonic-gate 			} else {
25870Sstevel@tonic-gate 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
25880Sstevel@tonic-gate 				    NULL, "    Paddr 0x%" PRIx64 ", "
25890Sstevel@tonic-gate 				    "Data 0x%08x.%08x", paddr,
25900Sstevel@tonic-gate 				    (uint32_t)(data>>32), (uint32_t)data);
25910Sstevel@tonic-gate 			}
25920Sstevel@tonic-gate 		}
25930Sstevel@tonic-gate 	}
25940Sstevel@tonic-gate }
25950Sstevel@tonic-gate 
25960Sstevel@tonic-gate static struct {		/* sec-ded-s4ed ecc code */
25970Sstevel@tonic-gate 	uint_t hi, lo;
25980Sstevel@tonic-gate } ecc_code[8] = {
25990Sstevel@tonic-gate 	{ 0xee55de23U, 0x16161161U },
26000Sstevel@tonic-gate 	{ 0x55eede93U, 0x61612212U },
26010Sstevel@tonic-gate 	{ 0xbb557b8cU, 0x49494494U },
26020Sstevel@tonic-gate 	{ 0x55bb7b6cU, 0x94948848U },
26030Sstevel@tonic-gate 	{ 0x16161161U, 0xee55de23U },
26040Sstevel@tonic-gate 	{ 0x61612212U, 0x55eede93U },
26050Sstevel@tonic-gate 	{ 0x49494494U, 0xbb557b8cU },
26060Sstevel@tonic-gate 	{ 0x94948848U, 0x55bb7b6cU }
26070Sstevel@tonic-gate };
26080Sstevel@tonic-gate 
26090Sstevel@tonic-gate static ushort_t
26100Sstevel@tonic-gate ecc_gen(uint_t high_bytes, uint_t low_bytes)
26110Sstevel@tonic-gate {
26120Sstevel@tonic-gate 	int i, j;
26130Sstevel@tonic-gate 	uchar_t checker, bit_mask;
26140Sstevel@tonic-gate 	struct {
26150Sstevel@tonic-gate 		uint_t hi, lo;
26160Sstevel@tonic-gate 	} hex_data, masked_data[8];
26170Sstevel@tonic-gate 
26180Sstevel@tonic-gate 	hex_data.hi = high_bytes;
26190Sstevel@tonic-gate 	hex_data.lo = low_bytes;
26200Sstevel@tonic-gate 
26210Sstevel@tonic-gate 	/* mask out bits according to sec-ded-s4ed ecc code */
26220Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
26230Sstevel@tonic-gate 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
26240Sstevel@tonic-gate 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
26250Sstevel@tonic-gate 	}
26260Sstevel@tonic-gate 
26270Sstevel@tonic-gate 	/*
26280Sstevel@tonic-gate 	 * xor all bits in masked_data[i] to get bit_i of checker,
26290Sstevel@tonic-gate 	 * where i = 0 to 7
26300Sstevel@tonic-gate 	 */
26310Sstevel@tonic-gate 	checker = 0;
26320Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
26330Sstevel@tonic-gate 		bit_mask = 1 << i;
26340Sstevel@tonic-gate 		for (j = 0; j < 32; j++) {
26350Sstevel@tonic-gate 			if (masked_data[i].lo & 1) checker ^= bit_mask;
26360Sstevel@tonic-gate 			if (masked_data[i].hi & 1) checker ^= bit_mask;
26370Sstevel@tonic-gate 			masked_data[i].hi >>= 1;
26380Sstevel@tonic-gate 			masked_data[i].lo >>= 1;
26390Sstevel@tonic-gate 		}
26400Sstevel@tonic-gate 	}
26410Sstevel@tonic-gate 	return (checker);
26420Sstevel@tonic-gate }
26430Sstevel@tonic-gate 
26440Sstevel@tonic-gate /*
26450Sstevel@tonic-gate  * Flush the entire ecache using displacement flush by reading through a
26460Sstevel@tonic-gate  * physical address range as large as the ecache.
26470Sstevel@tonic-gate  */
26480Sstevel@tonic-gate void
26490Sstevel@tonic-gate cpu_flush_ecache(void)
26500Sstevel@tonic-gate {
26510Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
26520Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
26530Sstevel@tonic-gate }
26540Sstevel@tonic-gate 
26550Sstevel@tonic-gate /*
26560Sstevel@tonic-gate  * read and display the data in the cache line where the
26570Sstevel@tonic-gate  * original ce error occurred.
26580Sstevel@tonic-gate  * This routine is mainly used for debugging new hardware.
26590Sstevel@tonic-gate  */
26600Sstevel@tonic-gate void
26610Sstevel@tonic-gate read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
26620Sstevel@tonic-gate {
26630Sstevel@tonic-gate 	kpreempt_disable();
26640Sstevel@tonic-gate 	/* disable ECC error traps */
26650Sstevel@tonic-gate 	set_error_enable(EER_ECC_DISABLE);
26660Sstevel@tonic-gate 
26670Sstevel@tonic-gate 	/*
26680Sstevel@tonic-gate 	 * flush the ecache
26690Sstevel@tonic-gate 	 * read the data
26700Sstevel@tonic-gate 	 * check to see if an ECC error occured
26710Sstevel@tonic-gate 	 */
26720Sstevel@tonic-gate 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
26730Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].ecache_linesize);
26740Sstevel@tonic-gate 	set_lsu(get_lsu() | cache_boot_state);
26750Sstevel@tonic-gate 	cpu_read_paddr(ecc, verbose, ce_err);
26760Sstevel@tonic-gate 	(void) check_ecc(ecc);
26770Sstevel@tonic-gate 
26780Sstevel@tonic-gate 	/* enable ECC error traps */
26790Sstevel@tonic-gate 	set_error_enable(EER_ENABLE);
26800Sstevel@tonic-gate 	kpreempt_enable();
26810Sstevel@tonic-gate }
26820Sstevel@tonic-gate 
26830Sstevel@tonic-gate /*
26840Sstevel@tonic-gate  * Check the AFSR bits for UE/CE persistence.
26850Sstevel@tonic-gate  * If UE or CE errors are detected, the routine will
26860Sstevel@tonic-gate  * clears all the AFSR sticky bits (except CP for
26870Sstevel@tonic-gate  * spitfire/blackbird) and the UDBs.
26880Sstevel@tonic-gate  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
26890Sstevel@tonic-gate  */
26900Sstevel@tonic-gate static int
26910Sstevel@tonic-gate check_ecc(struct async_flt *ecc)
26920Sstevel@tonic-gate {
26930Sstevel@tonic-gate 	uint64_t t_afsr;
26940Sstevel@tonic-gate 	uint64_t t_afar;
26950Sstevel@tonic-gate 	uint64_t udbh;
26960Sstevel@tonic-gate 	uint64_t udbl;
26970Sstevel@tonic-gate 	ushort_t udb;
26980Sstevel@tonic-gate 	int persistent = 0;
26990Sstevel@tonic-gate 
27000Sstevel@tonic-gate 	/*
27010Sstevel@tonic-gate 	 * Capture the AFSR, AFAR and UDBs info
27020Sstevel@tonic-gate 	 */
27030Sstevel@tonic-gate 	get_asyncflt(&t_afsr);
27040Sstevel@tonic-gate 	get_asyncaddr(&t_afar);
27050Sstevel@tonic-gate 	t_afar &= SABRE_AFAR_PA;
27060Sstevel@tonic-gate 	get_udb_errors(&udbh, &udbl);
27070Sstevel@tonic-gate 
27080Sstevel@tonic-gate 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
27090Sstevel@tonic-gate 		/*
27100Sstevel@tonic-gate 		 * Clear the errors
27110Sstevel@tonic-gate 		 */
27120Sstevel@tonic-gate 		clr_datapath();
27130Sstevel@tonic-gate 
27140Sstevel@tonic-gate 		if (isus2i || isus2e)
27150Sstevel@tonic-gate 			set_asyncflt(t_afsr);
27160Sstevel@tonic-gate 		else
27170Sstevel@tonic-gate 			set_asyncflt(t_afsr & ~P_AFSR_CP);
27180Sstevel@tonic-gate 
27190Sstevel@tonic-gate 		/*
27200Sstevel@tonic-gate 		 * determine whether to check UDBH or UDBL for persistence
27210Sstevel@tonic-gate 		 */
27220Sstevel@tonic-gate 		if (ecc->flt_synd & UDBL_REG) {
27230Sstevel@tonic-gate 			udb = (ushort_t)udbl;
27240Sstevel@tonic-gate 			t_afar |= 0x8;
27250Sstevel@tonic-gate 		} else {
27260Sstevel@tonic-gate 			udb = (ushort_t)udbh;
27270Sstevel@tonic-gate 		}
27280Sstevel@tonic-gate 
27290Sstevel@tonic-gate 		if (ce_debug || ue_debug) {
27300Sstevel@tonic-gate 			spitf_async_flt spf_flt; /* for logging */
27310Sstevel@tonic-gate 			struct async_flt *aflt =
27320Sstevel@tonic-gate 				(struct async_flt *)&spf_flt;
27330Sstevel@tonic-gate 
27340Sstevel@tonic-gate 			/* Package the info nicely in the spf_flt struct */
27350Sstevel@tonic-gate 			bzero(&spf_flt, sizeof (spitf_async_flt));
27360Sstevel@tonic-gate 			aflt->flt_stat = t_afsr;
27370Sstevel@tonic-gate 			aflt->flt_addr = t_afar;
27380Sstevel@tonic-gate 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
27390Sstevel@tonic-gate 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
27400Sstevel@tonic-gate 
27410Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
27420Sstevel@tonic-gate 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
27430Sstevel@tonic-gate 			    " check_ecc: Dumping captured error states ...");
27440Sstevel@tonic-gate 		}
27450Sstevel@tonic-gate 
27460Sstevel@tonic-gate 		/*
27470Sstevel@tonic-gate 		 * if the fault addresses don't match, not persistent
27480Sstevel@tonic-gate 		 */
27490Sstevel@tonic-gate 		if (t_afar != ecc->flt_addr) {
27500Sstevel@tonic-gate 			return (persistent);
27510Sstevel@tonic-gate 		}
27520Sstevel@tonic-gate 
27530Sstevel@tonic-gate 		/*
27540Sstevel@tonic-gate 		 * check for UE persistence
27550Sstevel@tonic-gate 		 * since all DIMMs in the bank are identified for a UE,
27560Sstevel@tonic-gate 		 * there's no reason to check the syndrome
27570Sstevel@tonic-gate 		 */
27580Sstevel@tonic-gate 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
27590Sstevel@tonic-gate 			persistent = 1;
27600Sstevel@tonic-gate 		}
27610Sstevel@tonic-gate 
27620Sstevel@tonic-gate 		/*
27630Sstevel@tonic-gate 		 * check for CE persistence
27640Sstevel@tonic-gate 		 */
27650Sstevel@tonic-gate 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
27660Sstevel@tonic-gate 			if ((udb & P_DER_E_SYND) ==
27670Sstevel@tonic-gate 			    (ecc->flt_synd & P_DER_E_SYND)) {
27680Sstevel@tonic-gate 				persistent = 1;
27690Sstevel@tonic-gate 			}
27700Sstevel@tonic-gate 		}
27710Sstevel@tonic-gate 	}
27720Sstevel@tonic-gate 	return (persistent);
27730Sstevel@tonic-gate }
27740Sstevel@tonic-gate 
27750Sstevel@tonic-gate #ifdef HUMMINGBIRD
27760Sstevel@tonic-gate #define	HB_FULL_DIV		1
27770Sstevel@tonic-gate #define	HB_HALF_DIV		2
27780Sstevel@tonic-gate #define	HB_LOWEST_DIV		8
27790Sstevel@tonic-gate #define	HB_ECLK_INVALID		0xdeadbad
27800Sstevel@tonic-gate static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
27810Sstevel@tonic-gate 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
27820Sstevel@tonic-gate 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
27830Sstevel@tonic-gate 	HB_ECLK_8 };
27840Sstevel@tonic-gate 
27850Sstevel@tonic-gate #define	HB_SLOW_DOWN		0
27860Sstevel@tonic-gate #define	HB_SPEED_UP		1
27870Sstevel@tonic-gate 
27880Sstevel@tonic-gate #define	SET_ESTAR_MODE(mode)					\
27890Sstevel@tonic-gate 	stdphysio(HB_ESTAR_MODE, (mode));			\
27900Sstevel@tonic-gate 	/*							\
27910Sstevel@tonic-gate 	 * PLL logic requires minimum of 16 clock		\
27920Sstevel@tonic-gate 	 * cycles to lock to the new clock speed.		\
27930Sstevel@tonic-gate 	 * Wait 1 usec to satisfy this requirement.		\
27940Sstevel@tonic-gate 	 */							\
27950Sstevel@tonic-gate 	drv_usecwait(1);
27960Sstevel@tonic-gate 
27970Sstevel@tonic-gate #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
27980Sstevel@tonic-gate {								\
27990Sstevel@tonic-gate 	volatile uint64_t data;					\
28000Sstevel@tonic-gate 	uint64_t count, new_count;				\
28010Sstevel@tonic-gate 	clock_t delay;						\
28020Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
28030Sstevel@tonic-gate 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
28040Sstevel@tonic-gate 	    HB_REFRESH_COUNT_SHIFT;				\
28050Sstevel@tonic-gate 	new_count = (HB_REFRESH_INTERVAL *			\
28060Sstevel@tonic-gate 	    cpunodes[CPU->cpu_id].clock_freq) /			\
28070Sstevel@tonic-gate 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
28080Sstevel@tonic-gate 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
28090Sstevel@tonic-gate 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
28100Sstevel@tonic-gate 	stdphysio(HB_MEM_CNTRL0, data);				\
28110Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);        		\
28120Sstevel@tonic-gate 	/*							\
28130Sstevel@tonic-gate 	 * If we are slowing down the cpu and Memory		\
28140Sstevel@tonic-gate 	 * Self Refresh is not enabled, it is required		\
28150Sstevel@tonic-gate 	 * to wait for old refresh count to count-down and	\
28160Sstevel@tonic-gate 	 * new refresh count to go into effect (let new value	\
28170Sstevel@tonic-gate 	 * counts down once).					\
28180Sstevel@tonic-gate 	 */							\
28190Sstevel@tonic-gate 	if ((direction) == HB_SLOW_DOWN &&			\
28200Sstevel@tonic-gate 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
28210Sstevel@tonic-gate 		/*						\
28220Sstevel@tonic-gate 		 * Each count takes 64 cpu clock cycles		\
28230Sstevel@tonic-gate 		 * to decrement.  Wait for current refresh	\
28240Sstevel@tonic-gate 		 * count plus new refresh count at current	\
28250Sstevel@tonic-gate 		 * cpu speed to count down to zero.  Round	\
28260Sstevel@tonic-gate 		 * up the delay time.				\
28270Sstevel@tonic-gate 		 */						\
28280Sstevel@tonic-gate 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
28290Sstevel@tonic-gate 		    (count + new_count) * MICROSEC * (cur_div)) /\
28300Sstevel@tonic-gate 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
28310Sstevel@tonic-gate 		drv_usecwait(delay);				\
28320Sstevel@tonic-gate 	}							\
28330Sstevel@tonic-gate }
28340Sstevel@tonic-gate 
28350Sstevel@tonic-gate #define	SET_SELF_REFRESH(bit)					\
28360Sstevel@tonic-gate {								\
28370Sstevel@tonic-gate 	volatile uint64_t data;					\
28380Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
28390Sstevel@tonic-gate 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
28400Sstevel@tonic-gate 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
28410Sstevel@tonic-gate 	stdphysio(HB_MEM_CNTRL0, data);				\
28420Sstevel@tonic-gate 	data = lddphysio(HB_MEM_CNTRL0);			\
28430Sstevel@tonic-gate }
28440Sstevel@tonic-gate #endif	/* HUMMINGBIRD */
28450Sstevel@tonic-gate 
28460Sstevel@tonic-gate /* ARGSUSED */
28470Sstevel@tonic-gate void
28480Sstevel@tonic-gate cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
28490Sstevel@tonic-gate {
28500Sstevel@tonic-gate #ifdef HUMMINGBIRD
28510Sstevel@tonic-gate 	uint64_t cur_mask, cur_divisor = 0;
28520Sstevel@tonic-gate 	volatile uint64_t reg;
28530Sstevel@tonic-gate 	int index;
28540Sstevel@tonic-gate 
28550Sstevel@tonic-gate 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
28560Sstevel@tonic-gate 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
28570Sstevel@tonic-gate 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
28580Sstevel@tonic-gate 		    new_divisor);
28590Sstevel@tonic-gate 		return;
28600Sstevel@tonic-gate 	}
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	reg = lddphysio(HB_ESTAR_MODE);
28630Sstevel@tonic-gate 	cur_mask = reg & HB_ECLK_MASK;
28640Sstevel@tonic-gate 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
28650Sstevel@tonic-gate 		if (hb_eclk[index] == cur_mask) {
28660Sstevel@tonic-gate 			cur_divisor = index;
28670Sstevel@tonic-gate 			break;
28680Sstevel@tonic-gate 		}
28690Sstevel@tonic-gate 	}
28700Sstevel@tonic-gate 
28710Sstevel@tonic-gate 	if (cur_divisor == 0)
28720Sstevel@tonic-gate 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
28730Sstevel@tonic-gate 		    "can't be determined!");
28740Sstevel@tonic-gate 
28750Sstevel@tonic-gate 	/*
28760Sstevel@tonic-gate 	 * If we are already at the requested divisor speed, just
28770Sstevel@tonic-gate 	 * return.
28780Sstevel@tonic-gate 	 */
28790Sstevel@tonic-gate 	if (cur_divisor == new_divisor)
28800Sstevel@tonic-gate 		return;
28810Sstevel@tonic-gate 
28820Sstevel@tonic-gate 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
28830Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
28840Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
28850Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
28860Sstevel@tonic-gate 
28870Sstevel@tonic-gate 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
28880Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
28890Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
28900Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
28910Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
28920Sstevel@tonic-gate 
28930Sstevel@tonic-gate 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
28940Sstevel@tonic-gate 		/*
28950Sstevel@tonic-gate 		 * Transition to 1/2 speed first, then to
28960Sstevel@tonic-gate 		 * lower speed.
28970Sstevel@tonic-gate 		 */
28980Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
28990Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
29000Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
29010Sstevel@tonic-gate 
29020Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
29030Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
29040Sstevel@tonic-gate 
29050Sstevel@tonic-gate 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
29060Sstevel@tonic-gate 		/*
29070Sstevel@tonic-gate 		 * Transition to 1/2 speed first, then to
29080Sstevel@tonic-gate 		 * full speed.
29090Sstevel@tonic-gate 		 */
29100Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
29110Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
29120Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
29130Sstevel@tonic-gate 
29140Sstevel@tonic-gate 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
29150Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
29160Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
29170Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
29180Sstevel@tonic-gate 
29190Sstevel@tonic-gate 	} else if (cur_divisor < new_divisor) {
29200Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
29210Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
29220Sstevel@tonic-gate 
29230Sstevel@tonic-gate 	} else if (cur_divisor > new_divisor) {
29240Sstevel@tonic-gate 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
29250Sstevel@tonic-gate 		/* LINTED: E_FALSE_LOGICAL_EXPR */
29260Sstevel@tonic-gate 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
29270Sstevel@tonic-gate 	}
29280Sstevel@tonic-gate 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
29290Sstevel@tonic-gate #endif
29300Sstevel@tonic-gate }
29310Sstevel@tonic-gate 
29320Sstevel@tonic-gate /*
29330Sstevel@tonic-gate  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
29340Sstevel@tonic-gate  * we clear all the sticky bits. If a non-null pointer to a async fault
29350Sstevel@tonic-gate  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
29360Sstevel@tonic-gate  * info will be returned in the structure.  If a non-null pointer to a
29370Sstevel@tonic-gate  * uint64_t is passed in, this will be updated if the CP bit is set in the
29380Sstevel@tonic-gate  * AFSR.  The afsr will be returned.
29390Sstevel@tonic-gate  */
29400Sstevel@tonic-gate static uint64_t
29410Sstevel@tonic-gate clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
29420Sstevel@tonic-gate {
29430Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
29440Sstevel@tonic-gate 	uint64_t afsr;
29450Sstevel@tonic-gate 	uint64_t udbh, udbl;
29460Sstevel@tonic-gate 
29470Sstevel@tonic-gate 	get_asyncflt(&afsr);
29480Sstevel@tonic-gate 
29490Sstevel@tonic-gate 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
29500Sstevel@tonic-gate 		*acc_afsr |= afsr;
29510Sstevel@tonic-gate 
29520Sstevel@tonic-gate 	if (spf_flt != NULL) {
29530Sstevel@tonic-gate 		aflt->flt_stat = afsr;
29540Sstevel@tonic-gate 		get_asyncaddr(&aflt->flt_addr);
29550Sstevel@tonic-gate 		aflt->flt_addr &= SABRE_AFAR_PA;
29560Sstevel@tonic-gate 
29570Sstevel@tonic-gate 		get_udb_errors(&udbh, &udbl);
29580Sstevel@tonic-gate 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
29590Sstevel@tonic-gate 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
29600Sstevel@tonic-gate 	}
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	set_asyncflt(afsr);		/* clear afsr */
29630Sstevel@tonic-gate 	clr_datapath();			/* clear udbs */
29640Sstevel@tonic-gate 	return (afsr);
29650Sstevel@tonic-gate }
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate /*
29680Sstevel@tonic-gate  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
29690Sstevel@tonic-gate  * tag of the first bad line will be returned. We also return the old-afsr
29700Sstevel@tonic-gate  * (before clearing the sticky bits). The linecnt data will be updated to
29710Sstevel@tonic-gate  * indicate the number of bad lines detected.
29720Sstevel@tonic-gate  */
29730Sstevel@tonic-gate static void
29740Sstevel@tonic-gate scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
29750Sstevel@tonic-gate 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
29760Sstevel@tonic-gate {
29770Sstevel@tonic-gate 	ec_data_t t_ecdata[8];
29780Sstevel@tonic-gate 	uint64_t t_etag, oafsr;
29790Sstevel@tonic-gate 	uint64_t pa = AFLT_INV_ADDR;
29800Sstevel@tonic-gate 	uint32_t i, j, ecache_sz;
29810Sstevel@tonic-gate 	uint64_t acc_afsr = 0;
29820Sstevel@tonic-gate 	uint64_t *cpu_afsr = NULL;
29830Sstevel@tonic-gate 
29840Sstevel@tonic-gate 	if (CPU_PRIVATE(CPU) != NULL)
29850Sstevel@tonic-gate 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
29860Sstevel@tonic-gate 
29870Sstevel@tonic-gate 	*linecnt = 0;
29880Sstevel@tonic-gate 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
29890Sstevel@tonic-gate 
29900Sstevel@tonic-gate 	for (i = 0; i < ecache_sz; i += 64) {
29910Sstevel@tonic-gate 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
29920Sstevel@tonic-gate 		    cpu_afsr);
29930Sstevel@tonic-gate 		acc_afsr |= oafsr;
29940Sstevel@tonic-gate 
29950Sstevel@tonic-gate 		/*
29960Sstevel@tonic-gate 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
29970Sstevel@tonic-gate 		 * looking for the first occurrence of an EDP error.  The AFSR
29980Sstevel@tonic-gate 		 * info is captured for each 8-byte chunk.  Note that for
29990Sstevel@tonic-gate 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
30000Sstevel@tonic-gate 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
30010Sstevel@tonic-gate 		 * for the high and low 8-byte words within the 16-byte chunk).
30020Sstevel@tonic-gate 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
30030Sstevel@tonic-gate 		 * granularity and only PSYND bits [7:0] are used.
30040Sstevel@tonic-gate 		 */
30050Sstevel@tonic-gate 		for (j = 0; j < 8; j++) {
30060Sstevel@tonic-gate 			ec_data_t *ecdptr = &t_ecdata[j];
30070Sstevel@tonic-gate 
30080Sstevel@tonic-gate 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
30090Sstevel@tonic-gate 				uint64_t errpa;
30100Sstevel@tonic-gate 				ushort_t psynd;
30110Sstevel@tonic-gate 				uint32_t ec_set_size = ecache_sz /
30120Sstevel@tonic-gate 				    ecache_associativity;
30130Sstevel@tonic-gate 
30140Sstevel@tonic-gate 				/*
30150Sstevel@tonic-gate 				 * For Spitfire/Blackbird, we need to look at
30160Sstevel@tonic-gate 				 * the PSYND to make sure that this 8-byte chunk
30170Sstevel@tonic-gate 				 * is the right one.  PSYND bits [15:8] belong
30180Sstevel@tonic-gate 				 * to the upper 8-byte (even) chunk.  Bits
30190Sstevel@tonic-gate 				 * [7:0] belong to the lower 8-byte chunk (odd).
30200Sstevel@tonic-gate 				 */
30210Sstevel@tonic-gate 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
30220Sstevel@tonic-gate 				if (!isus2i && !isus2e) {
30230Sstevel@tonic-gate 					if (j & 0x1)
30240Sstevel@tonic-gate 						psynd = psynd & 0xFF;
30250Sstevel@tonic-gate 					else
30260Sstevel@tonic-gate 						psynd = psynd >> 8;
30270Sstevel@tonic-gate 
30280Sstevel@tonic-gate 					if (!psynd)
30290Sstevel@tonic-gate 						continue; /* wrong chunk */
30300Sstevel@tonic-gate 				}
30310Sstevel@tonic-gate 
30320Sstevel@tonic-gate 				/* Construct the PA */
30330Sstevel@tonic-gate 				errpa = ((t_etag & cpu_ec_tag_mask) <<
30340Sstevel@tonic-gate 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
30350Sstevel@tonic-gate 				    ec_set_size);
30360Sstevel@tonic-gate 
30370Sstevel@tonic-gate 				/* clean up the cache line */
30380Sstevel@tonic-gate 				flushecacheline(P2ALIGN(errpa, 64),
30390Sstevel@tonic-gate 					cpunodes[CPU->cpu_id].ecache_size);
30400Sstevel@tonic-gate 
30410Sstevel@tonic-gate 				oafsr = clear_errors(NULL, cpu_afsr);
30420Sstevel@tonic-gate 				acc_afsr |= oafsr;
30430Sstevel@tonic-gate 
30440Sstevel@tonic-gate 				(*linecnt)++;
30450Sstevel@tonic-gate 
30460Sstevel@tonic-gate 				/*
30470Sstevel@tonic-gate 				 * Capture the PA for the first bad line found.
30480Sstevel@tonic-gate 				 * Return the ecache dump and tag info.
30490Sstevel@tonic-gate 				 */
30500Sstevel@tonic-gate 				if (pa == AFLT_INV_ADDR) {
30510Sstevel@tonic-gate 					int k;
30520Sstevel@tonic-gate 
30530Sstevel@tonic-gate 					pa = errpa;
30540Sstevel@tonic-gate 					for (k = 0; k < 8; k++)
30550Sstevel@tonic-gate 						ecache_data[k] = t_ecdata[k];
30560Sstevel@tonic-gate 					*ecache_tag = t_etag;
30570Sstevel@tonic-gate 				}
30580Sstevel@tonic-gate 				break;
30590Sstevel@tonic-gate 			}
30600Sstevel@tonic-gate 		}
30610Sstevel@tonic-gate 	}
30620Sstevel@tonic-gate 	*t_afar = pa;
30630Sstevel@tonic-gate 	*t_afsr = acc_afsr;
30640Sstevel@tonic-gate }
30650Sstevel@tonic-gate 
30660Sstevel@tonic-gate static void
30670Sstevel@tonic-gate cpu_log_ecmem_info(spitf_async_flt *spf_flt)
30680Sstevel@tonic-gate {
30690Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spf_flt;
30700Sstevel@tonic-gate 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
30710Sstevel@tonic-gate 	char linestr[30];
30720Sstevel@tonic-gate 	char *state_str;
30730Sstevel@tonic-gate 	int i;
30740Sstevel@tonic-gate 
30750Sstevel@tonic-gate 	/*
30760Sstevel@tonic-gate 	 * Check the ecache tag to make sure it
30770Sstevel@tonic-gate 	 * is valid. If invalid, a memory dump was
30780Sstevel@tonic-gate 	 * captured instead of a ecache dump.
30790Sstevel@tonic-gate 	 */
30800Sstevel@tonic-gate 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
30810Sstevel@tonic-gate 		uchar_t eparity = (uchar_t)
30820Sstevel@tonic-gate 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
30830Sstevel@tonic-gate 
30840Sstevel@tonic-gate 		uchar_t estate = (uchar_t)
30850Sstevel@tonic-gate 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
30860Sstevel@tonic-gate 
30870Sstevel@tonic-gate 		if (estate == cpu_ec_state_shr)
30880Sstevel@tonic-gate 			state_str = "Shared";
30890Sstevel@tonic-gate 		else if (estate == cpu_ec_state_exl)
30900Sstevel@tonic-gate 			state_str = "Exclusive";
30910Sstevel@tonic-gate 		else if (estate == cpu_ec_state_own)
30920Sstevel@tonic-gate 			state_str = "Owner";
30930Sstevel@tonic-gate 		else if (estate == cpu_ec_state_mod)
30940Sstevel@tonic-gate 			state_str = "Modified";
30950Sstevel@tonic-gate 		else
30960Sstevel@tonic-gate 			state_str = "Invalid";
30970Sstevel@tonic-gate 
30980Sstevel@tonic-gate 		if (spf_flt->flt_ec_lcnt > 1) {
30990Sstevel@tonic-gate 			(void) snprintf(linestr, sizeof (linestr),
31000Sstevel@tonic-gate 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
31010Sstevel@tonic-gate 		} else {
31020Sstevel@tonic-gate 			linestr[0] = '\0';
31030Sstevel@tonic-gate 		}
31040Sstevel@tonic-gate 
31050Sstevel@tonic-gate 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
31060Sstevel@tonic-gate 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
31070Sstevel@tonic-gate 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
31080Sstevel@tonic-gate 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
31090Sstevel@tonic-gate 		    (uint32_t)ecache_tag, state_str,
31100Sstevel@tonic-gate 		    (uint32_t)eparity, linestr);
31110Sstevel@tonic-gate 	} else {
31120Sstevel@tonic-gate 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
31130Sstevel@tonic-gate 		    " E$tag != PA from AFAR; E$line was victimized"
31140Sstevel@tonic-gate 		    "\n    dumping memory from PA 0x%08x.%08x instead",
31150Sstevel@tonic-gate 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
31160Sstevel@tonic-gate 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
31170Sstevel@tonic-gate 	}
31180Sstevel@tonic-gate 
31190Sstevel@tonic-gate 	/*
31200Sstevel@tonic-gate 	 * Dump out all 8 8-byte ecache data captured
31210Sstevel@tonic-gate 	 * For each 8-byte data captured, we check the
31220Sstevel@tonic-gate 	 * captured afsr's parity syndrome to find out
31230Sstevel@tonic-gate 	 * which 8-byte chunk is bad. For memory dump, the
31240Sstevel@tonic-gate 	 * AFSR values were initialized to 0.
31250Sstevel@tonic-gate 	 */
31260Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
31270Sstevel@tonic-gate 		ec_data_t *ecdptr;
31280Sstevel@tonic-gate 		uint_t offset;
31290Sstevel@tonic-gate 		ushort_t psynd;
31300Sstevel@tonic-gate 		ushort_t bad;
31310Sstevel@tonic-gate 		uint64_t edp;
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate 		offset = i << 3;	/* multiply by 8 */
31340Sstevel@tonic-gate 		ecdptr = &spf_flt->flt_ec_data[i];
31350Sstevel@tonic-gate 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
31360Sstevel@tonic-gate 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
31370Sstevel@tonic-gate 
31380Sstevel@tonic-gate 		/*
31390Sstevel@tonic-gate 		 * For Sabre/Hummingbird, parity synd is captured only
31400Sstevel@tonic-gate 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
31410Sstevel@tonic-gate 		 * For spitfire/blackbird, AFSR.PSYND is captured
31420Sstevel@tonic-gate 		 * in 16-byte granularity. [15:8] represent
31430Sstevel@tonic-gate 		 * the upper 8 byte and [7:0] the lower 8 byte.
31440Sstevel@tonic-gate 		 */
31450Sstevel@tonic-gate 		if (isus2i || isus2e || (i & 0x1))
31460Sstevel@tonic-gate 			bad = (psynd & 0xFF);		/* check bits [7:0] */
31470Sstevel@tonic-gate 		else
31480Sstevel@tonic-gate 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
31490Sstevel@tonic-gate 
31500Sstevel@tonic-gate 		if (bad && edp) {
31510Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
31520Sstevel@tonic-gate 			    " E$Data (0x%02x): 0x%08x.%08x "
31530Sstevel@tonic-gate 			    "*Bad* PSYND=0x%04x", offset,
31540Sstevel@tonic-gate 			    (uint32_t)(ecdptr->ec_d8 >> 32),
31550Sstevel@tonic-gate 			    (uint32_t)ecdptr->ec_d8, psynd);
31560Sstevel@tonic-gate 		} else {
31570Sstevel@tonic-gate 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
31580Sstevel@tonic-gate 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
31590Sstevel@tonic-gate 			    (uint32_t)(ecdptr->ec_d8 >> 32),
31600Sstevel@tonic-gate 			    (uint32_t)ecdptr->ec_d8);
31610Sstevel@tonic-gate 		}
31620Sstevel@tonic-gate 	}
31630Sstevel@tonic-gate }
31640Sstevel@tonic-gate 
31650Sstevel@tonic-gate /*
31660Sstevel@tonic-gate  * Common logging function for all cpu async errors.  This function allows the
31670Sstevel@tonic-gate  * caller to generate a single cmn_err() call that logs the appropriate items
31680Sstevel@tonic-gate  * from the fault structure, and implements our rules for AFT logging levels.
31690Sstevel@tonic-gate  *
31700Sstevel@tonic-gate  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
31710Sstevel@tonic-gate  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
31720Sstevel@tonic-gate  *	spflt: pointer to spitfire async fault structure
31730Sstevel@tonic-gate  *	logflags: bitflags indicating what to output
31740Sstevel@tonic-gate  *	endstr: a end string to appear at the end of this log
31750Sstevel@tonic-gate  *	fmt: a format string to appear at the beginning of the log
31760Sstevel@tonic-gate  *
31770Sstevel@tonic-gate  * The logflags allows the construction of predetermined output from the spflt
31780Sstevel@tonic-gate  * structure.  The individual data items always appear in a consistent order.
31790Sstevel@tonic-gate  * Note that either or both of the spflt structure pointer and logflags may be
31800Sstevel@tonic-gate  * NULL or zero respectively, indicating that the predetermined output
31810Sstevel@tonic-gate  * substrings are not requested in this log.  The output looks like this:
31820Sstevel@tonic-gate  *
31830Sstevel@tonic-gate  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
31840Sstevel@tonic-gate  *	<CPU_SPACE><CPU_ERRID>
31850Sstevel@tonic-gate  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
31860Sstevel@tonic-gate  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
31870Sstevel@tonic-gate  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
31880Sstevel@tonic-gate  *	newline+4spaces<CPU_SYND>
31890Sstevel@tonic-gate  *	newline+4spaces<endstr>
31900Sstevel@tonic-gate  *
31910Sstevel@tonic-gate  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
31920Sstevel@tonic-gate  * it is assumed that <endstr> will be the unum string in this case.  The size
31930Sstevel@tonic-gate  * of our intermediate formatting buf[] is based on the worst case of all flags
31940Sstevel@tonic-gate  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
31950Sstevel@tonic-gate  * formatting so we don't need additional stack space to format them here.
31960Sstevel@tonic-gate  */
31970Sstevel@tonic-gate /*PRINTFLIKE6*/
31980Sstevel@tonic-gate static void
31990Sstevel@tonic-gate cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
32000Sstevel@tonic-gate 	const char *endstr, const char *fmt, ...)
32010Sstevel@tonic-gate {
32020Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)spflt;
32030Sstevel@tonic-gate 	char buf[400], *p, *q; /* see comments about buf[] size above */
32040Sstevel@tonic-gate 	va_list ap;
32050Sstevel@tonic-gate 	int console_log_flag;
32060Sstevel@tonic-gate 
32070Sstevel@tonic-gate 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
32080Sstevel@tonic-gate 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
32090Sstevel@tonic-gate 	    (aflt->flt_panic)) {
32100Sstevel@tonic-gate 		console_log_flag = (tagnum < 2) || aft_verbose;
32110Sstevel@tonic-gate 	} else {
32120Sstevel@tonic-gate 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
32130Sstevel@tonic-gate 		    (aflt->flt_stat & P_AFSR_CE)) ?
32140Sstevel@tonic-gate 		    ce_verbose_memory : ce_verbose_other;
32150Sstevel@tonic-gate 
32160Sstevel@tonic-gate 		if (!verbose)
32170Sstevel@tonic-gate 			return;
32180Sstevel@tonic-gate 
32190Sstevel@tonic-gate 		console_log_flag = (verbose > 1);
32200Sstevel@tonic-gate 	}
32210Sstevel@tonic-gate 
32220Sstevel@tonic-gate 	if (console_log_flag)
32230Sstevel@tonic-gate 		(void) sprintf(buf, "[AFT%d]", tagnum);
32240Sstevel@tonic-gate 	else
32250Sstevel@tonic-gate 		(void) sprintf(buf, "![AFT%d]", tagnum);
32260Sstevel@tonic-gate 
32270Sstevel@tonic-gate 	p = buf + strlen(buf);	/* current buffer position */
32280Sstevel@tonic-gate 	q = buf + sizeof (buf);	/* pointer past end of buffer */
32290Sstevel@tonic-gate 
32300Sstevel@tonic-gate 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
32310Sstevel@tonic-gate 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
32320Sstevel@tonic-gate 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
32330Sstevel@tonic-gate 		p += strlen(p);
32340Sstevel@tonic-gate 	}
32350Sstevel@tonic-gate 
32360Sstevel@tonic-gate 	/*
32370Sstevel@tonic-gate 	 * Copy the caller's format string verbatim into buf[].  It will be
32380Sstevel@tonic-gate 	 * formatted by the call to vcmn_err() at the end of this function.
32390Sstevel@tonic-gate 	 */
32400Sstevel@tonic-gate 	if (fmt != NULL && p < q) {
32410Sstevel@tonic-gate 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
32420Sstevel@tonic-gate 		buf[sizeof (buf) - 1] = '\0';
32430Sstevel@tonic-gate 		p += strlen(p);
32440Sstevel@tonic-gate 	}
32450Sstevel@tonic-gate 
32460Sstevel@tonic-gate 	if (spflt != NULL) {
32470Sstevel@tonic-gate 		if (logflags & CPU_FLTCPU) {
32480Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
32490Sstevel@tonic-gate 			    aflt->flt_inst);
32500Sstevel@tonic-gate 			p += strlen(p);
32510Sstevel@tonic-gate 		}
32520Sstevel@tonic-gate 
32530Sstevel@tonic-gate 		if (logflags & CPU_SPACE) {
32540Sstevel@tonic-gate 			if (aflt->flt_status & ECC_D_TRAP)
32550Sstevel@tonic-gate 				(void) snprintf(p, (size_t)(q - p),
32560Sstevel@tonic-gate 				    " Data access");
32570Sstevel@tonic-gate 			else if (aflt->flt_status & ECC_I_TRAP)
32580Sstevel@tonic-gate 				(void) snprintf(p, (size_t)(q - p),
32590Sstevel@tonic-gate 				    " Instruction access");
32600Sstevel@tonic-gate 			p += strlen(p);
32610Sstevel@tonic-gate 		}
32620Sstevel@tonic-gate 
32630Sstevel@tonic-gate 		if (logflags & CPU_TL) {
32640Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
32650Sstevel@tonic-gate 			    aflt->flt_tl ? ">0" : "=0");
32660Sstevel@tonic-gate 			p += strlen(p);
32670Sstevel@tonic-gate 		}
32680Sstevel@tonic-gate 
32690Sstevel@tonic-gate 		if (logflags & CPU_ERRID) {
32700Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
32710Sstevel@tonic-gate 			    ", errID 0x%08x.%08x",
32720Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_id >> 32),
32730Sstevel@tonic-gate 			    (uint32_t)aflt->flt_id);
32740Sstevel@tonic-gate 			p += strlen(p);
32750Sstevel@tonic-gate 		}
32760Sstevel@tonic-gate 
32770Sstevel@tonic-gate 		if (logflags & CPU_AFSR) {
32780Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
32790Sstevel@tonic-gate 			    "\n    AFSR 0x%08b.%08b",
32800Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
32810Sstevel@tonic-gate 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
32820Sstevel@tonic-gate 			p += strlen(p);
32830Sstevel@tonic-gate 		}
32840Sstevel@tonic-gate 
32850Sstevel@tonic-gate 		if (logflags & CPU_AFAR) {
32860Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
32870Sstevel@tonic-gate 			    (uint32_t)(aflt->flt_addr >> 32),
32880Sstevel@tonic-gate 			    (uint32_t)aflt->flt_addr);
32890Sstevel@tonic-gate 			p += strlen(p);
32900Sstevel@tonic-gate 		}
32910Sstevel@tonic-gate 
32920Sstevel@tonic-gate 		if (logflags & CPU_AF_PSYND) {
32930Sstevel@tonic-gate 			ushort_t psynd = (ushort_t)
32940Sstevel@tonic-gate 			    (aflt->flt_stat & P_AFSR_P_SYND);
32950Sstevel@tonic-gate 
32960Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
32970Sstevel@tonic-gate 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
32980Sstevel@tonic-gate 			    psynd, ecc_psynd_score(psynd));
32990Sstevel@tonic-gate 			p += strlen(p);
33000Sstevel@tonic-gate 		}
33010Sstevel@tonic-gate 
33020Sstevel@tonic-gate 		if (logflags & CPU_AF_ETS) {
33030Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
33040Sstevel@tonic-gate 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
33050Sstevel@tonic-gate 			p += strlen(p);
33060Sstevel@tonic-gate 		}
33070Sstevel@tonic-gate 
33080Sstevel@tonic-gate 		if (logflags & CPU_FAULTPC) {
33090Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
33100Sstevel@tonic-gate 			    (void *)aflt->flt_pc);
33110Sstevel@tonic-gate 			p += strlen(p);
33120Sstevel@tonic-gate 		}
33130Sstevel@tonic-gate 
33140Sstevel@tonic-gate 		if (logflags & CPU_UDBH) {
33150Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
33160Sstevel@tonic-gate 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
33170Sstevel@tonic-gate 			    spflt->flt_sdbh, UDB_FMTSTR,
33180Sstevel@tonic-gate 			    spflt->flt_sdbh & 0xFF);
33190Sstevel@tonic-gate 			p += strlen(p);
33200Sstevel@tonic-gate 		}
33210Sstevel@tonic-gate 
33220Sstevel@tonic-gate 		if (logflags & CPU_UDBL) {
33230Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
33240Sstevel@tonic-gate 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
33250Sstevel@tonic-gate 			    spflt->flt_sdbl, UDB_FMTSTR,
33260Sstevel@tonic-gate 			    spflt->flt_sdbl & 0xFF);
33270Sstevel@tonic-gate 			p += strlen(p);
33280Sstevel@tonic-gate 		}
33290Sstevel@tonic-gate 
33300Sstevel@tonic-gate 		if (logflags & CPU_SYND) {
33310Sstevel@tonic-gate 			ushort_t synd = SYND(aflt->flt_synd);
33320Sstevel@tonic-gate 
33330Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p),
33340Sstevel@tonic-gate 			    "\n    %s Syndrome 0x%x Memory Module ",
33350Sstevel@tonic-gate 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
33360Sstevel@tonic-gate 			p += strlen(p);
33370Sstevel@tonic-gate 		}
33380Sstevel@tonic-gate 	}
33390Sstevel@tonic-gate 
33400Sstevel@tonic-gate 	if (endstr != NULL) {
33410Sstevel@tonic-gate 		if (!(logflags & CPU_SYND))
33420Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
33430Sstevel@tonic-gate 		else
33440Sstevel@tonic-gate 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
33450Sstevel@tonic-gate 		p += strlen(p);
33460Sstevel@tonic-gate 	}
33470Sstevel@tonic-gate 
33480Sstevel@tonic-gate 	if (ce_code == CE_CONT && (p < q - 1))
33490Sstevel@tonic-gate 		(void) strcpy(p, "\n"); /* add final \n if needed */
33500Sstevel@tonic-gate 
33510Sstevel@tonic-gate 	va_start(ap, fmt);
33520Sstevel@tonic-gate 	vcmn_err(ce_code, buf, ap);
33530Sstevel@tonic-gate 	va_end(ap);
33540Sstevel@tonic-gate }
33550Sstevel@tonic-gate 
33560Sstevel@tonic-gate /*
33570Sstevel@tonic-gate  * Ecache Scrubbing
33580Sstevel@tonic-gate  *
33590Sstevel@tonic-gate  * The basic idea is to prevent lines from sitting in the ecache long enough
33600Sstevel@tonic-gate  * to build up soft errors which can lead to ecache parity errors.
33610Sstevel@tonic-gate  *
33620Sstevel@tonic-gate  * The following rules are observed when flushing the ecache:
33630Sstevel@tonic-gate  *
33640Sstevel@tonic-gate  * 1. When the system is busy, flush bad clean lines
33650Sstevel@tonic-gate  * 2. When the system is idle, flush all clean lines
33660Sstevel@tonic-gate  * 3. When the system is idle, flush good dirty lines
33670Sstevel@tonic-gate  * 4. Never flush bad dirty lines.
33680Sstevel@tonic-gate  *
33690Sstevel@tonic-gate  *	modify	parity	busy   idle
33700Sstevel@tonic-gate  *	----------------------------
33710Sstevel@tonic-gate  *	clean	good		X
33720Sstevel@tonic-gate  * 	clean	bad	X	X
33730Sstevel@tonic-gate  * 	dirty	good		X
33740Sstevel@tonic-gate  *	dirty	bad
33750Sstevel@tonic-gate  *
33760Sstevel@tonic-gate  * Bad or good refers to whether a line has an E$ parity error or not.
33770Sstevel@tonic-gate  * Clean or dirty refers to the state of the modified bit.  We currently
33780Sstevel@tonic-gate  * default the scan rate to 100 (scan 10% of the cache per second).
33790Sstevel@tonic-gate  *
33800Sstevel@tonic-gate  * The following are E$ states and actions.
33810Sstevel@tonic-gate  *
33820Sstevel@tonic-gate  * We encode our state as a 3-bit number, consisting of:
33830Sstevel@tonic-gate  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
33840Sstevel@tonic-gate  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
33850Sstevel@tonic-gate  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
33860Sstevel@tonic-gate  *
33870Sstevel@tonic-gate  * We associate a flushing and a logging action with each state.
33880Sstevel@tonic-gate  *
33890Sstevel@tonic-gate  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
33900Sstevel@tonic-gate  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
33910Sstevel@tonic-gate  * E$ only, in addition to value being set by ec_flush.
33920Sstevel@tonic-gate  */
33930Sstevel@tonic-gate 
33940Sstevel@tonic-gate #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
33950Sstevel@tonic-gate #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
33960Sstevel@tonic-gate #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
33970Sstevel@tonic-gate 
33980Sstevel@tonic-gate struct {
33990Sstevel@tonic-gate 	char	ec_flush;		/* whether to flush or not */
34000Sstevel@tonic-gate 	char	ec_log;			/* ecache logging */
34010Sstevel@tonic-gate 	char	ec_log_type;		/* log type info */
34020Sstevel@tonic-gate } ec_action[] = {	/* states of the E$ line in M P B */
34030Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
34040Sstevel@tonic-gate 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
34050Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
34060Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
34070Sstevel@tonic-gate 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
34080Sstevel@tonic-gate 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
34090Sstevel@tonic-gate 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
34100Sstevel@tonic-gate 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
34110Sstevel@tonic-gate };
34120Sstevel@tonic-gate 
34130Sstevel@tonic-gate /*
34140Sstevel@tonic-gate  * Offsets into the ec_action[] that determines clean_good_busy and
34150Sstevel@tonic-gate  * dirty_good_busy lines.
34160Sstevel@tonic-gate  */
34170Sstevel@tonic-gate #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
34180Sstevel@tonic-gate #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
34190Sstevel@tonic-gate 
34200Sstevel@tonic-gate /*
34210Sstevel@tonic-gate  * We are flushing lines which are Clean_Good_Busy and also the lines
34220Sstevel@tonic-gate  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
34230Sstevel@tonic-gate  */
34240Sstevel@tonic-gate #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
34250Sstevel@tonic-gate #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
34260Sstevel@tonic-gate 
34270Sstevel@tonic-gate #define	ECACHE_STATE_MODIFIED	0x4
34280Sstevel@tonic-gate #define	ECACHE_STATE_PARITY	0x2
34290Sstevel@tonic-gate #define	ECACHE_STATE_BUSY	0x1
34300Sstevel@tonic-gate 
34310Sstevel@tonic-gate /*
34320Sstevel@tonic-gate  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
34330Sstevel@tonic-gate  */
34340Sstevel@tonic-gate int ecache_calls_a_sec_mirrored = 1;
34350Sstevel@tonic-gate int ecache_lines_per_call_mirrored = 1;
34360Sstevel@tonic-gate 
34370Sstevel@tonic-gate int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
34380Sstevel@tonic-gate int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
34390Sstevel@tonic-gate int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
34400Sstevel@tonic-gate int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
34410Sstevel@tonic-gate int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
34420Sstevel@tonic-gate int ecache_idle_factor = 1;		/* increase the scan rate when idle */
34430Sstevel@tonic-gate int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
34440Sstevel@tonic-gate int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
34450Sstevel@tonic-gate 
34460Sstevel@tonic-gate volatile int ec_timeout_calls = 1;	/* timeout calls */
34470Sstevel@tonic-gate 
34480Sstevel@tonic-gate /*
34490Sstevel@tonic-gate  * Interrupt number and pil for ecache scrubber cross-trap calls.
34500Sstevel@tonic-gate  */
34510Sstevel@tonic-gate static uint_t ecache_scrub_inum;
34520Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9;
34530Sstevel@tonic-gate 
34540Sstevel@tonic-gate /*
34550Sstevel@tonic-gate  * Kstats for the E$ scrubber.
34560Sstevel@tonic-gate  */
34570Sstevel@tonic-gate typedef struct ecache_kstat {
34580Sstevel@tonic-gate 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
34590Sstevel@tonic-gate 	kstat_named_t clean_good_busy;		/* # of lines skipped */
34600Sstevel@tonic-gate 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
34610Sstevel@tonic-gate 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
34620Sstevel@tonic-gate 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
34630Sstevel@tonic-gate 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
34640Sstevel@tonic-gate 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
34650Sstevel@tonic-gate 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
34660Sstevel@tonic-gate 	kstat_named_t invalid_lines;		/* # of invalid lines */
34670Sstevel@tonic-gate 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
34680Sstevel@tonic-gate 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
34690Sstevel@tonic-gate 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
34700Sstevel@tonic-gate } ecache_kstat_t;
34710Sstevel@tonic-gate 
34720Sstevel@tonic-gate static ecache_kstat_t ec_kstat_template = {
34730Sstevel@tonic-gate 	{ "clean_good_idle", KSTAT_DATA_ULONG },
34740Sstevel@tonic-gate 	{ "clean_good_busy", KSTAT_DATA_ULONG },
34750Sstevel@tonic-gate 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
34760Sstevel@tonic-gate 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
34770Sstevel@tonic-gate 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
34780Sstevel@tonic-gate 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
34790Sstevel@tonic-gate 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
34800Sstevel@tonic-gate 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
34810Sstevel@tonic-gate 	{ "invalid_lines", KSTAT_DATA_ULONG },
34820Sstevel@tonic-gate 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
34830Sstevel@tonic-gate 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
34840Sstevel@tonic-gate 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
34850Sstevel@tonic-gate };
34860Sstevel@tonic-gate 
34870Sstevel@tonic-gate struct kmem_cache *sf_private_cache;
34880Sstevel@tonic-gate 
34890Sstevel@tonic-gate /*
34900Sstevel@tonic-gate  * Called periodically on each CPU to scan the ecache once a sec.
34910Sstevel@tonic-gate  * adjusting the ecache line index appropriately
34920Sstevel@tonic-gate  */
34930Sstevel@tonic-gate void
34940Sstevel@tonic-gate scrub_ecache_line()
34950Sstevel@tonic-gate {
34960Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
34970Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
34980Sstevel@tonic-gate 	uint32_t index = ssmp->ecache_flush_index;
34990Sstevel@tonic-gate 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
35000Sstevel@tonic-gate 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
35010Sstevel@tonic-gate 	int nlines = ssmp->ecache_nlines;
35020Sstevel@tonic-gate 	uint32_t ec_set_size = ec_size / ecache_associativity;
35030Sstevel@tonic-gate 	int ec_mirror = ssmp->ecache_mirror;
35040Sstevel@tonic-gate 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
35050Sstevel@tonic-gate 
35060Sstevel@tonic-gate 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
35070Sstevel@tonic-gate 	int mpb;		/* encode Modified, Parity, Busy for action */
35080Sstevel@tonic-gate 	uchar_t state;
35090Sstevel@tonic-gate 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
35100Sstevel@tonic-gate 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
35110Sstevel@tonic-gate 	ec_data_t ec_data[8];
35120Sstevel@tonic-gate 	kstat_named_t *ec_knp;
35130Sstevel@tonic-gate 
35140Sstevel@tonic-gate 	switch (ec_mirror) {
35150Sstevel@tonic-gate 		default:
35160Sstevel@tonic-gate 		case ECACHE_CPU_NON_MIRROR:
35170Sstevel@tonic-gate 			/*
35180Sstevel@tonic-gate 			 * The E$ scan rate is expressed in units of tenths of
35190Sstevel@tonic-gate 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
35200Sstevel@tonic-gate 			 * whole cache is scanned every second.
35210Sstevel@tonic-gate 			 */
35220Sstevel@tonic-gate 			scan_lines = (nlines * ecache_scan_rate) /
35230Sstevel@tonic-gate 					(1000 * ecache_calls_a_sec);
35240Sstevel@tonic-gate 			if (!(ssmp->ecache_busy)) {
35250Sstevel@tonic-gate 				if (ecache_idle_factor > 0) {
35260Sstevel@tonic-gate 					scan_lines *= ecache_idle_factor;
35270Sstevel@tonic-gate 				}
35280Sstevel@tonic-gate 			} else {
35290Sstevel@tonic-gate 				flush_clean_busy = (scan_lines *
35300Sstevel@tonic-gate 					ecache_flush_clean_good_busy) / 100;
35310Sstevel@tonic-gate 				flush_dirty_busy = (scan_lines *
35320Sstevel@tonic-gate 					ecache_flush_dirty_good_busy) / 100;
35330Sstevel@tonic-gate 			}
35340Sstevel@tonic-gate 
35350Sstevel@tonic-gate 			ec_timeout_calls = (ecache_calls_a_sec ?
35360Sstevel@tonic-gate 						ecache_calls_a_sec : 1);
35370Sstevel@tonic-gate 			break;
35380Sstevel@tonic-gate 
35390Sstevel@tonic-gate 		case ECACHE_CPU_MIRROR:
35400Sstevel@tonic-gate 			scan_lines = ecache_lines_per_call_mirrored;
35410Sstevel@tonic-gate 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
35420Sstevel@tonic-gate 					ecache_calls_a_sec_mirrored : 1);
35430Sstevel@tonic-gate 			break;
35440Sstevel@tonic-gate 	}
35450Sstevel@tonic-gate 
35460Sstevel@tonic-gate 	/*
35470Sstevel@tonic-gate 	 * The ecache scrubber algorithm operates by reading and
35480Sstevel@tonic-gate 	 * decoding the E$ tag to determine whether the corresponding E$ line
35490Sstevel@tonic-gate 	 * can be scrubbed. There is a implicit assumption in the scrubber
35500Sstevel@tonic-gate 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
35510Sstevel@tonic-gate 	 * flawed since the E$ tag may also be corrupted and have parity errors
35520Sstevel@tonic-gate 	 * The scrubber logic is enhanced to check the validity of the E$ tag
35530Sstevel@tonic-gate 	 * before scrubbing. When a parity error is detected in the E$ tag,
35540Sstevel@tonic-gate 	 * it is possible to recover and scrub the tag under certain conditions
35550Sstevel@tonic-gate 	 * so that a ETP error condition can be avoided.
35560Sstevel@tonic-gate 	 */
35570Sstevel@tonic-gate 
35580Sstevel@tonic-gate 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
35590Sstevel@tonic-gate 		/*
35600Sstevel@tonic-gate 		 * We get the old-AFSR before clearing the AFSR sticky bits
35610Sstevel@tonic-gate 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
35620Sstevel@tonic-gate 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
35630Sstevel@tonic-gate 		 */
35640Sstevel@tonic-gate 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
35650Sstevel@tonic-gate 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
35660Sstevel@tonic-gate 				cpu_ec_state_shift);
35670Sstevel@tonic-gate 
35680Sstevel@tonic-gate 		/*
35690Sstevel@tonic-gate 		 * ETP is set try to scrub the ecache tag.
35700Sstevel@tonic-gate 		 */
35710Sstevel@tonic-gate 		if (nafsr & P_AFSR_ETP) {
35720Sstevel@tonic-gate 			ecache_scrub_tag_err(nafsr, state, index);
35730Sstevel@tonic-gate 		} else if (state & cpu_ec_state_valid) {
35740Sstevel@tonic-gate 			/*
35750Sstevel@tonic-gate 			 * ETP is not set, E$ tag is valid.
35760Sstevel@tonic-gate 			 * Proceed with the E$ scrubbing.
35770Sstevel@tonic-gate 			 */
35780Sstevel@tonic-gate 			if (state & cpu_ec_state_dirty)
35790Sstevel@tonic-gate 				mpb |= ECACHE_STATE_MODIFIED;
35800Sstevel@tonic-gate 
35810Sstevel@tonic-gate 			tafsr = check_ecache_line(index, acc_afsr);
35820Sstevel@tonic-gate 
35830Sstevel@tonic-gate 			if (tafsr & P_AFSR_EDP) {
35840Sstevel@tonic-gate 				mpb |= ECACHE_STATE_PARITY;
35850Sstevel@tonic-gate 
35860Sstevel@tonic-gate 				if (ecache_scrub_verbose ||
35870Sstevel@tonic-gate 							ecache_scrub_panic) {
35880Sstevel@tonic-gate 					get_ecache_dtag(P2ALIGN(index, 64),
35890Sstevel@tonic-gate 						(uint64_t *)&ec_data[0],
35900Sstevel@tonic-gate 						&ec_tag, &oafsr, acc_afsr);
35910Sstevel@tonic-gate 				}
35920Sstevel@tonic-gate 			}
35930Sstevel@tonic-gate 
35940Sstevel@tonic-gate 			if (ssmp->ecache_busy)
35950Sstevel@tonic-gate 				mpb |= ECACHE_STATE_BUSY;
35960Sstevel@tonic-gate 
35970Sstevel@tonic-gate 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
35980Sstevel@tonic-gate 			ec_knp->value.ul++;
35990Sstevel@tonic-gate 
36000Sstevel@tonic-gate 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
36010Sstevel@tonic-gate 				cpu_ec_tag_shift) | (index % ec_set_size);
36020Sstevel@tonic-gate 
36030Sstevel@tonic-gate 			/*
36040Sstevel@tonic-gate 			 * We flush the E$ lines depending on the ec_flush,
36050Sstevel@tonic-gate 			 * we additionally flush clean_good_busy and
36060Sstevel@tonic-gate 			 * dirty_good_busy lines for mirrored E$.
36070Sstevel@tonic-gate 			 */
36080Sstevel@tonic-gate 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
36090Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
36100Sstevel@tonic-gate 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
36110Sstevel@tonic-gate 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
36120Sstevel@tonic-gate 					flushecacheline(paddr, ec_size);
36130Sstevel@tonic-gate 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
36140Sstevel@tonic-gate 				softcall(ecache_page_retire, (void *)paddr);
36150Sstevel@tonic-gate 			}
36160Sstevel@tonic-gate 
36170Sstevel@tonic-gate 			/*
36180Sstevel@tonic-gate 			 * Conditionally flush both the clean_good and
36190Sstevel@tonic-gate 			 * dirty_good lines when busy.
36200Sstevel@tonic-gate 			 */
36210Sstevel@tonic-gate 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
36220Sstevel@tonic-gate 				flush_clean_busy--;
36230Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
36240Sstevel@tonic-gate 				ec_ksp->clean_good_busy_flush.value.ul++;
36250Sstevel@tonic-gate 			} else if (DGB(mpb, ec_mirror) &&
36260Sstevel@tonic-gate 						(flush_dirty_busy > 0)) {
36270Sstevel@tonic-gate 				flush_dirty_busy--;
36280Sstevel@tonic-gate 				flushecacheline(paddr, ec_size);
36290Sstevel@tonic-gate 				ec_ksp->dirty_good_busy_flush.value.ul++;
36300Sstevel@tonic-gate 			}
36310Sstevel@tonic-gate 
36320Sstevel@tonic-gate 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
36330Sstevel@tonic-gate 						ecache_scrub_panic)) {
36340Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
36350Sstevel@tonic-gate 						tafsr);
36360Sstevel@tonic-gate 			}
36370Sstevel@tonic-gate 
36380Sstevel@tonic-gate 		} else {
36390Sstevel@tonic-gate 			ec_ksp->invalid_lines.value.ul++;
36400Sstevel@tonic-gate 		}
36410Sstevel@tonic-gate 
36420Sstevel@tonic-gate 		if ((index += ec_linesize) >= ec_size)
36430Sstevel@tonic-gate 			index = 0;
36440Sstevel@tonic-gate 
36450Sstevel@tonic-gate 	}
36460Sstevel@tonic-gate 
36470Sstevel@tonic-gate 	/*
36480Sstevel@tonic-gate 	 * set the ecache scrub index for the next time around
36490Sstevel@tonic-gate 	 */
36500Sstevel@tonic-gate 	ssmp->ecache_flush_index = index;
36510Sstevel@tonic-gate 
36520Sstevel@tonic-gate 	if (*acc_afsr & P_AFSR_CP) {
36530Sstevel@tonic-gate 		uint64_t ret_afsr;
36540Sstevel@tonic-gate 
36550Sstevel@tonic-gate 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
36560Sstevel@tonic-gate 		if ((ret_afsr & P_AFSR_CP) == 0)
36570Sstevel@tonic-gate 			*acc_afsr = 0;
36580Sstevel@tonic-gate 	}
36590Sstevel@tonic-gate }
36600Sstevel@tonic-gate 
36610Sstevel@tonic-gate /*
36620Sstevel@tonic-gate  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
36630Sstevel@tonic-gate  * we decrement the outstanding request count to zero.
36640Sstevel@tonic-gate  */
36650Sstevel@tonic-gate 
36660Sstevel@tonic-gate /*ARGSUSED*/
36670Sstevel@tonic-gate uint_t
36680Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
36690Sstevel@tonic-gate {
36700Sstevel@tonic-gate 	int i;
36710Sstevel@tonic-gate 	int outstanding;
36720Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
36730Sstevel@tonic-gate 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
36740Sstevel@tonic-gate 
36750Sstevel@tonic-gate 	do {
36760Sstevel@tonic-gate 		outstanding = *countp;
36770Sstevel@tonic-gate 		ASSERT(outstanding > 0);
36780Sstevel@tonic-gate 		for (i = 0; i < outstanding; i++)
36790Sstevel@tonic-gate 			scrub_ecache_line();
36800Sstevel@tonic-gate 	} while (atomic_add_32_nv(countp, -outstanding));
36810Sstevel@tonic-gate 
36820Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
36830Sstevel@tonic-gate }
36840Sstevel@tonic-gate 
36850Sstevel@tonic-gate /*
36860Sstevel@tonic-gate  * force each cpu to perform an ecache scrub, called from a timeout
36870Sstevel@tonic-gate  */
36880Sstevel@tonic-gate extern xcfunc_t ecache_scrubreq_tl1;
36890Sstevel@tonic-gate 
36900Sstevel@tonic-gate void
36910Sstevel@tonic-gate do_scrub_ecache_line(void)
36920Sstevel@tonic-gate {
36930Sstevel@tonic-gate 	long delta;
36940Sstevel@tonic-gate 
36950Sstevel@tonic-gate 	if (ecache_calls_a_sec > hz)
36960Sstevel@tonic-gate 		ecache_calls_a_sec = hz;
36970Sstevel@tonic-gate 	else if (ecache_calls_a_sec <= 0)
36980Sstevel@tonic-gate 	    ecache_calls_a_sec = 1;
36990Sstevel@tonic-gate 
37000Sstevel@tonic-gate 	if (ecache_calls_a_sec_mirrored > hz)
37010Sstevel@tonic-gate 		ecache_calls_a_sec_mirrored = hz;
37020Sstevel@tonic-gate 	else if (ecache_calls_a_sec_mirrored <= 0)
37030Sstevel@tonic-gate 	    ecache_calls_a_sec_mirrored = 1;
37040Sstevel@tonic-gate 
37050Sstevel@tonic-gate 	if (ecache_scrub_enable) {
37060Sstevel@tonic-gate 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
37070Sstevel@tonic-gate 		delta = hz / ec_timeout_calls;
37080Sstevel@tonic-gate 	} else {
37090Sstevel@tonic-gate 		delta = hz;
37100Sstevel@tonic-gate 	}
37110Sstevel@tonic-gate 
37120Sstevel@tonic-gate 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
37130Sstevel@tonic-gate 		delta);
37140Sstevel@tonic-gate }
37150Sstevel@tonic-gate 
37160Sstevel@tonic-gate /*
37170Sstevel@tonic-gate  * initialization for ecache scrubbing
37180Sstevel@tonic-gate  * This routine is called AFTER all cpus have had cpu_init_private called
37190Sstevel@tonic-gate  * to initialize their private data areas.
37200Sstevel@tonic-gate  */
37210Sstevel@tonic-gate void
37220Sstevel@tonic-gate cpu_init_cache_scrub(void)
37230Sstevel@tonic-gate {
37240Sstevel@tonic-gate 	if (ecache_calls_a_sec > hz) {
37250Sstevel@tonic-gate 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
37260Sstevel@tonic-gate 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
37270Sstevel@tonic-gate 		ecache_calls_a_sec = hz;
37280Sstevel@tonic-gate 	}
37290Sstevel@tonic-gate 
37300Sstevel@tonic-gate 	/*
37310Sstevel@tonic-gate 	 * Register softint for ecache scrubbing.
37320Sstevel@tonic-gate 	 */
37330Sstevel@tonic-gate 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
37340Sstevel@tonic-gate 	    scrub_ecache_line_intr, NULL);
37350Sstevel@tonic-gate 
37360Sstevel@tonic-gate 	/*
37370Sstevel@tonic-gate 	 * kick off the scrubbing using realtime timeout
37380Sstevel@tonic-gate 	 */
37390Sstevel@tonic-gate 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
37400Sstevel@tonic-gate 	    hz / ecache_calls_a_sec);
37410Sstevel@tonic-gate }
37420Sstevel@tonic-gate 
37430Sstevel@tonic-gate /*
37440Sstevel@tonic-gate  * Unset the busy flag for this cpu.
37450Sstevel@tonic-gate  */
37460Sstevel@tonic-gate void
37470Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp)
37480Sstevel@tonic-gate {
37490Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
37500Sstevel@tonic-gate 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
37510Sstevel@tonic-gate 							sfpr_scrub_misc);
37520Sstevel@tonic-gate 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
37530Sstevel@tonic-gate 	}
37540Sstevel@tonic-gate }
37550Sstevel@tonic-gate 
37560Sstevel@tonic-gate /*
37570Sstevel@tonic-gate  * Set the busy flag for this cpu.
37580Sstevel@tonic-gate  */
37590Sstevel@tonic-gate void
37600Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp)
37610Sstevel@tonic-gate {
37620Sstevel@tonic-gate 	if (CPU_PRIVATE(cp) != NULL) {
37630Sstevel@tonic-gate 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
37640Sstevel@tonic-gate 							sfpr_scrub_misc);
37650Sstevel@tonic-gate 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
37660Sstevel@tonic-gate 	}
37670Sstevel@tonic-gate }
37680Sstevel@tonic-gate 
37690Sstevel@tonic-gate /*
37700Sstevel@tonic-gate  * initialize the ecache scrubber data structures
37710Sstevel@tonic-gate  * The global entry point cpu_init_private replaces this entry point.
37720Sstevel@tonic-gate  *
37730Sstevel@tonic-gate  */
37740Sstevel@tonic-gate static void
37750Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp)
37760Sstevel@tonic-gate {
37770Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
37780Sstevel@tonic-gate 	int cpuid = cp->cpu_id;
37790Sstevel@tonic-gate 
37800Sstevel@tonic-gate 	/*
37810Sstevel@tonic-gate 	 * intialize bookkeeping for cache scrubbing
37820Sstevel@tonic-gate 	 */
37830Sstevel@tonic-gate 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
37840Sstevel@tonic-gate 
37850Sstevel@tonic-gate 	ssmp->ecache_flush_index = 0;
37860Sstevel@tonic-gate 
37870Sstevel@tonic-gate 	ssmp->ecache_nlines =
37880Sstevel@tonic-gate 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
37890Sstevel@tonic-gate 
37900Sstevel@tonic-gate 	/*
37910Sstevel@tonic-gate 	 * Determine whether we are running on mirrored SRAM
37920Sstevel@tonic-gate 	 */
37930Sstevel@tonic-gate 
37940Sstevel@tonic-gate 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
37950Sstevel@tonic-gate 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
37960Sstevel@tonic-gate 	else
37970Sstevel@tonic-gate 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
37980Sstevel@tonic-gate 
37990Sstevel@tonic-gate 	cpu_busy_ecache_scrub(cp);
38000Sstevel@tonic-gate 
38010Sstevel@tonic-gate 	/*
38020Sstevel@tonic-gate 	 * initialize the kstats
38030Sstevel@tonic-gate 	 */
38040Sstevel@tonic-gate 	ecache_kstat_init(cp);
38050Sstevel@tonic-gate }
38060Sstevel@tonic-gate 
38070Sstevel@tonic-gate /*
38080Sstevel@tonic-gate  * uninitialize the ecache scrubber data structures
38090Sstevel@tonic-gate  * The global entry point cpu_uninit_private replaces this entry point.
38100Sstevel@tonic-gate  */
38110Sstevel@tonic-gate static void
38120Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp)
38130Sstevel@tonic-gate {
38140Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
38150Sstevel@tonic-gate 
38160Sstevel@tonic-gate 	if (ssmp->ecache_ksp != NULL) {
38170Sstevel@tonic-gate 		kstat_delete(ssmp->ecache_ksp);
38180Sstevel@tonic-gate 		ssmp->ecache_ksp = NULL;
38190Sstevel@tonic-gate 	}
38200Sstevel@tonic-gate 
38210Sstevel@tonic-gate 	/*
38220Sstevel@tonic-gate 	 * un-initialize bookkeeping for cache scrubbing
38230Sstevel@tonic-gate 	 */
38240Sstevel@tonic-gate 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
38250Sstevel@tonic-gate 
38260Sstevel@tonic-gate 	cpu_idle_ecache_scrub(cp);
38270Sstevel@tonic-gate }
38280Sstevel@tonic-gate 
38290Sstevel@tonic-gate struct kmem_cache *sf_private_cache;
38300Sstevel@tonic-gate 
38310Sstevel@tonic-gate /*
38320Sstevel@tonic-gate  * Cpu private initialization.  This includes allocating the cpu_private
38330Sstevel@tonic-gate  * data structure, initializing it, and initializing the scrubber for this
38340Sstevel@tonic-gate  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
38350Sstevel@tonic-gate  * calls cpu_init_ecache_scrub_dr to init the scrubber.
38360Sstevel@tonic-gate  * We use kmem_cache_create for the spitfire private data structure because it
38370Sstevel@tonic-gate  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
38380Sstevel@tonic-gate  */
38390Sstevel@tonic-gate void
38400Sstevel@tonic-gate cpu_init_private(struct cpu *cp)
38410Sstevel@tonic-gate {
38420Sstevel@tonic-gate 	spitfire_private_t *sfprp;
38430Sstevel@tonic-gate 
38440Sstevel@tonic-gate 	ASSERT(CPU_PRIVATE(cp) == NULL);
38450Sstevel@tonic-gate 
38460Sstevel@tonic-gate 	/*
38470Sstevel@tonic-gate 	 * If the sf_private_cache has not been created, create it.
38480Sstevel@tonic-gate 	 */
38490Sstevel@tonic-gate 	if (sf_private_cache == NULL) {
38500Sstevel@tonic-gate 		sf_private_cache = kmem_cache_create("sf_private_cache",
38510Sstevel@tonic-gate 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
38520Sstevel@tonic-gate 			NULL, NULL, NULL, NULL, 0);
38530Sstevel@tonic-gate 		ASSERT(sf_private_cache);
38540Sstevel@tonic-gate 	}
38550Sstevel@tonic-gate 
38560Sstevel@tonic-gate 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
38570Sstevel@tonic-gate 
38580Sstevel@tonic-gate 	bzero(sfprp, sizeof (spitfire_private_t));
38590Sstevel@tonic-gate 
38600Sstevel@tonic-gate 	cpu_init_ecache_scrub_dr(cp);
38610Sstevel@tonic-gate }
38620Sstevel@tonic-gate 
38630Sstevel@tonic-gate /*
38640Sstevel@tonic-gate  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
38650Sstevel@tonic-gate  * deallocate the scrubber data structures and cpu_private data structure.
38660Sstevel@tonic-gate  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
38670Sstevel@tonic-gate  * the scrubber for the specified cpu.
38680Sstevel@tonic-gate  */
38690Sstevel@tonic-gate void
38700Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp)
38710Sstevel@tonic-gate {
38720Sstevel@tonic-gate 	ASSERT(CPU_PRIVATE(cp));
38730Sstevel@tonic-gate 
38740Sstevel@tonic-gate 	cpu_uninit_ecache_scrub_dr(cp);
38750Sstevel@tonic-gate 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
38760Sstevel@tonic-gate 	CPU_PRIVATE(cp) = NULL;
38770Sstevel@tonic-gate }
38780Sstevel@tonic-gate 
38790Sstevel@tonic-gate /*
38800Sstevel@tonic-gate  * initialize the ecache kstats for each cpu
38810Sstevel@tonic-gate  */
38820Sstevel@tonic-gate static void
38830Sstevel@tonic-gate ecache_kstat_init(struct cpu *cp)
38840Sstevel@tonic-gate {
38850Sstevel@tonic-gate 	struct kstat *ksp;
38860Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
38870Sstevel@tonic-gate 
38880Sstevel@tonic-gate 	ASSERT(ssmp != NULL);
38890Sstevel@tonic-gate 
38900Sstevel@tonic-gate 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
38910Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
38920Sstevel@tonic-gate 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
38930Sstevel@tonic-gate 	    KSTAT_FLAG_WRITABLE)) == NULL) {
38940Sstevel@tonic-gate 		ssmp->ecache_ksp = NULL;
38950Sstevel@tonic-gate 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
38960Sstevel@tonic-gate 		return;
38970Sstevel@tonic-gate 	}
38980Sstevel@tonic-gate 
38990Sstevel@tonic-gate 	ssmp->ecache_ksp = ksp;
39000Sstevel@tonic-gate 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
39010Sstevel@tonic-gate 	kstat_install(ksp);
39020Sstevel@tonic-gate }
39030Sstevel@tonic-gate 
39040Sstevel@tonic-gate /*
39050Sstevel@tonic-gate  * log the bad ecache information
39060Sstevel@tonic-gate  */
39070Sstevel@tonic-gate static void
39080Sstevel@tonic-gate ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
39090Sstevel@tonic-gate 		uint64_t afsr)
39100Sstevel@tonic-gate {
39110Sstevel@tonic-gate 	spitf_async_flt spf_flt;
39120Sstevel@tonic-gate 	struct async_flt *aflt;
39130Sstevel@tonic-gate 	int i;
39140Sstevel@tonic-gate 	char *class;
39150Sstevel@tonic-gate 
39160Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
39170Sstevel@tonic-gate 	aflt = &spf_flt.cmn_asyncflt;
39180Sstevel@tonic-gate 
39190Sstevel@tonic-gate 	for (i = 0; i < 8; i++) {
39200Sstevel@tonic-gate 		spf_flt.flt_ec_data[i] = ec_data[i];
39210Sstevel@tonic-gate 	}
39220Sstevel@tonic-gate 
39230Sstevel@tonic-gate 	spf_flt.flt_ec_tag = ec_tag;
39240Sstevel@tonic-gate 
39250Sstevel@tonic-gate 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
39260Sstevel@tonic-gate 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
39270Sstevel@tonic-gate 	} else spf_flt.flt_type = (ushort_t)mpb;
39280Sstevel@tonic-gate 
39290Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
39300Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
39310Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
39320Sstevel@tonic-gate 	aflt->flt_addr = paddr;
39330Sstevel@tonic-gate 	aflt->flt_stat = afsr;
39340Sstevel@tonic-gate 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
39350Sstevel@tonic-gate 
39360Sstevel@tonic-gate 	switch (mpb) {
39370Sstevel@tonic-gate 	case CPU_ECACHE_TAG_ERR:
39380Sstevel@tonic-gate 	case CPU_ECACHE_ADDR_PAR_ERR:
39390Sstevel@tonic-gate 	case CPU_ECACHE_ETP_ETS_ERR:
39400Sstevel@tonic-gate 	case CPU_ECACHE_STATE_ERR:
39410Sstevel@tonic-gate 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
39420Sstevel@tonic-gate 		break;
39430Sstevel@tonic-gate 	default:
39440Sstevel@tonic-gate 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
39450Sstevel@tonic-gate 		break;
39460Sstevel@tonic-gate 	}
39470Sstevel@tonic-gate 
39480Sstevel@tonic-gate 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
39490Sstevel@tonic-gate 	    ue_queue, aflt->flt_panic);
39500Sstevel@tonic-gate 
39510Sstevel@tonic-gate 	if (aflt->flt_panic)
39520Sstevel@tonic-gate 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
39530Sstevel@tonic-gate 					"line detected");
39540Sstevel@tonic-gate }
39550Sstevel@tonic-gate 
39560Sstevel@tonic-gate /*
39570Sstevel@tonic-gate  * Process an ecache error that occured during the E$ scrubbing.
39580Sstevel@tonic-gate  * We do the ecache scan to find the bad line, flush the bad line
39590Sstevel@tonic-gate  * and start the memscrubber to find any UE (in memory or in another cache)
39600Sstevel@tonic-gate  */
39610Sstevel@tonic-gate static uint64_t
39620Sstevel@tonic-gate ecache_scrub_misc_err(int type, uint64_t afsr)
39630Sstevel@tonic-gate {
39640Sstevel@tonic-gate 	spitf_async_flt spf_flt;
39650Sstevel@tonic-gate 	struct async_flt *aflt;
39660Sstevel@tonic-gate 	uint64_t oafsr;
39670Sstevel@tonic-gate 
39680Sstevel@tonic-gate 	bzero(&spf_flt, sizeof (spitf_async_flt));
39690Sstevel@tonic-gate 	aflt = &spf_flt.cmn_asyncflt;
39700Sstevel@tonic-gate 
39710Sstevel@tonic-gate 	/*
39720Sstevel@tonic-gate 	 * Scan each line in the cache to look for the one
39730Sstevel@tonic-gate 	 * with bad parity
39740Sstevel@tonic-gate 	 */
39750Sstevel@tonic-gate 	aflt->flt_addr = AFLT_INV_ADDR;
39760Sstevel@tonic-gate 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
39770Sstevel@tonic-gate 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
39780Sstevel@tonic-gate 
39790Sstevel@tonic-gate 	if (oafsr & P_AFSR_CP) {
39800Sstevel@tonic-gate 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
39810Sstevel@tonic-gate 		*cp_afsr |= oafsr;
39820Sstevel@tonic-gate 	}
39830Sstevel@tonic-gate 
39840Sstevel@tonic-gate 	/*
39850Sstevel@tonic-gate 	 * If we found a bad PA, update the state to indicate if it is
39860Sstevel@tonic-gate 	 * memory or I/O space.
39870Sstevel@tonic-gate 	 */
39880Sstevel@tonic-gate 	if (aflt->flt_addr != AFLT_INV_ADDR) {
39890Sstevel@tonic-gate 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
39900Sstevel@tonic-gate 			MMU_PAGESHIFT)) ? 1 : 0;
39910Sstevel@tonic-gate 	}
39920Sstevel@tonic-gate 
39930Sstevel@tonic-gate 	spf_flt.flt_type = (ushort_t)type;
39940Sstevel@tonic-gate 
39950Sstevel@tonic-gate 	aflt->flt_inst = CPU->cpu_id;
39960Sstevel@tonic-gate 	aflt->flt_class = CPU_FAULT;
39970Sstevel@tonic-gate 	aflt->flt_id = gethrtime_waitfree();
39980Sstevel@tonic-gate 	aflt->flt_status = afsr;
39990Sstevel@tonic-gate 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
40000Sstevel@tonic-gate 
40010Sstevel@tonic-gate 	/*
40020Sstevel@tonic-gate 	 * We have the bad line, flush that line and start
40030Sstevel@tonic-gate 	 * the memscrubber.
40040Sstevel@tonic-gate 	 */
40050Sstevel@tonic-gate 	if (spf_flt.flt_ec_lcnt > 0) {
40060Sstevel@tonic-gate 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
40070Sstevel@tonic-gate 			cpunodes[CPU->cpu_id].ecache_size);
40080Sstevel@tonic-gate 		read_all_memscrub = 1;
40090Sstevel@tonic-gate 		memscrub_run();
40100Sstevel@tonic-gate 	}
40110Sstevel@tonic-gate 
40120Sstevel@tonic-gate 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
40130Sstevel@tonic-gate 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
40140Sstevel@tonic-gate 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
40150Sstevel@tonic-gate 
40160Sstevel@tonic-gate 	return (oafsr);
40170Sstevel@tonic-gate }
40180Sstevel@tonic-gate 
40190Sstevel@tonic-gate static void
40200Sstevel@tonic-gate ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
40210Sstevel@tonic-gate {
40220Sstevel@tonic-gate 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
40230Sstevel@tonic-gate 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
40240Sstevel@tonic-gate 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
40250Sstevel@tonic-gate 	uint64_t ec_tag, paddr, oafsr;
40260Sstevel@tonic-gate 	ec_data_t ec_data[8];
40270Sstevel@tonic-gate 	int cpuid = CPU->cpu_id;
40280Sstevel@tonic-gate 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
40290Sstevel@tonic-gate 						ecache_associativity;
40300Sstevel@tonic-gate 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
40310Sstevel@tonic-gate 
40320Sstevel@tonic-gate 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
40330Sstevel@tonic-gate 			&oafsr, cpu_afsr);
40340Sstevel@tonic-gate 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
40350Sstevel@tonic-gate 						(index % ec_set_size);
40360Sstevel@tonic-gate 
40370Sstevel@tonic-gate 	/*
40380Sstevel@tonic-gate 	 * E$ tag state has good parity
40390Sstevel@tonic-gate 	 */
40400Sstevel@tonic-gate 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
40410Sstevel@tonic-gate 		if (afsr_ets & cpu_ec_parity) {
40420Sstevel@tonic-gate 			/*
40430Sstevel@tonic-gate 			 * E$ tag state bits indicate the line is clean,
40440Sstevel@tonic-gate 			 * invalidate the E$ tag and continue.
40450Sstevel@tonic-gate 			 */
40460Sstevel@tonic-gate 			if (!(state & cpu_ec_state_dirty)) {
40470Sstevel@tonic-gate 				/*
40480Sstevel@tonic-gate 				 * Zero the tag and mark the state invalid
40490Sstevel@tonic-gate 				 * with good parity for the tag.
40500Sstevel@tonic-gate 				 */
40510Sstevel@tonic-gate 				if (isus2i || isus2e)
40520Sstevel@tonic-gate 					write_hb_ec_tag_parity(index);
40530Sstevel@tonic-gate 				else
40540Sstevel@tonic-gate 					write_ec_tag_parity(index);
40550Sstevel@tonic-gate 
40560Sstevel@tonic-gate 				/* Sync with the dual tag */
40570Sstevel@tonic-gate 				flushecacheline(0,
40580Sstevel@tonic-gate 					cpunodes[CPU->cpu_id].ecache_size);
40590Sstevel@tonic-gate 				ec_ksp->tags_cleared.value.ul++;
40600Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr,
40610Sstevel@tonic-gate 					CPU_ECACHE_TAG_ERR, afsr);
40620Sstevel@tonic-gate 				return;
40630Sstevel@tonic-gate 			} else {
40640Sstevel@tonic-gate 				ecache_scrub_log(ec_data, ec_tag, paddr,
40650Sstevel@tonic-gate 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
40660Sstevel@tonic-gate 				cmn_err(CE_PANIC, " E$ tag address has bad"
40670Sstevel@tonic-gate 							" parity");
40680Sstevel@tonic-gate 			}
40690Sstevel@tonic-gate 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
40700Sstevel@tonic-gate 			/*
40710Sstevel@tonic-gate 			 * ETS is zero but ETP is set
40720Sstevel@tonic-gate 			 */
40730Sstevel@tonic-gate 			ecache_scrub_log(ec_data, ec_tag, paddr,
40740Sstevel@tonic-gate 				CPU_ECACHE_ETP_ETS_ERR, afsr);
40750Sstevel@tonic-gate 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
40760Sstevel@tonic-gate 				" AFSR.ETS is zero");
40770Sstevel@tonic-gate 		}
40780Sstevel@tonic-gate 	} else {
40790Sstevel@tonic-gate 		/*
40800Sstevel@tonic-gate 		 * E$ tag state bit has a bad parity
40810Sstevel@tonic-gate 		 */
40820Sstevel@tonic-gate 		ecache_scrub_log(ec_data, ec_tag, paddr,
40830Sstevel@tonic-gate 				CPU_ECACHE_STATE_ERR, afsr);
40840Sstevel@tonic-gate 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
40850Sstevel@tonic-gate 	}
40860Sstevel@tonic-gate }
40870Sstevel@tonic-gate 
40880Sstevel@tonic-gate static void
40890Sstevel@tonic-gate ecache_page_retire(void *arg)
40900Sstevel@tonic-gate {
40910Sstevel@tonic-gate 	uint64_t paddr = (uint64_t)arg;
40920Sstevel@tonic-gate 	page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT));
40930Sstevel@tonic-gate 
40940Sstevel@tonic-gate 	if (pp) {
40950Sstevel@tonic-gate 		page_settoxic(pp, PAGE_IS_FAULTY);
40960Sstevel@tonic-gate 		(void) page_retire(pp, PAGE_IS_TOXIC);
40970Sstevel@tonic-gate 	}
40980Sstevel@tonic-gate }
40990Sstevel@tonic-gate 
41000Sstevel@tonic-gate void
41010Sstevel@tonic-gate sticksync_slave(void)
41020Sstevel@tonic-gate {}
41030Sstevel@tonic-gate 
41040Sstevel@tonic-gate void
41050Sstevel@tonic-gate sticksync_master(void)
41060Sstevel@tonic-gate {}
41070Sstevel@tonic-gate 
41080Sstevel@tonic-gate /*ARGSUSED*/
41090Sstevel@tonic-gate void
41100Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
41110Sstevel@tonic-gate {}
41120Sstevel@tonic-gate 
41130Sstevel@tonic-gate void
41140Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
41150Sstevel@tonic-gate {
41160Sstevel@tonic-gate 	int status;
41170Sstevel@tonic-gate 	ddi_fm_error_t de;
41180Sstevel@tonic-gate 
41190Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
41200Sstevel@tonic-gate 
41210Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
41220Sstevel@tonic-gate 	    FM_ENA_FMT1);
41230Sstevel@tonic-gate 	de.fme_flag = expected;
41240Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
41250Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
41260Sstevel@tonic-gate 
41270Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
41280Sstevel@tonic-gate 		aflt->flt_panic = 1;
41290Sstevel@tonic-gate }
41300Sstevel@tonic-gate 
41310Sstevel@tonic-gate /*ARGSUSED*/
41320Sstevel@tonic-gate void
41330Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
41340Sstevel@tonic-gate     errorq_t *eqp, uint_t flag)
41350Sstevel@tonic-gate {
41360Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)payload;
41370Sstevel@tonic-gate 
41380Sstevel@tonic-gate 	aflt->flt_erpt_class = error_class;
41390Sstevel@tonic-gate 	errorq_dispatch(eqp, payload, payload_sz, flag);
41400Sstevel@tonic-gate }
41410Sstevel@tonic-gate 
41420Sstevel@tonic-gate #define	MAX_SIMM	8
41430Sstevel@tonic-gate 
41440Sstevel@tonic-gate struct ce_info {
41450Sstevel@tonic-gate 	char    name[UNUM_NAMLEN];
41460Sstevel@tonic-gate 	uint64_t intermittent_total;
41470Sstevel@tonic-gate 	uint64_t persistent_total;
41480Sstevel@tonic-gate 	uint64_t sticky_total;
41490Sstevel@tonic-gate 	unsigned short leaky_bucket_cnt;
41500Sstevel@tonic-gate };
41510Sstevel@tonic-gate 
41520Sstevel@tonic-gate /*
41530Sstevel@tonic-gate  * Separately-defined structure for use in reporting the ce_info
41540Sstevel@tonic-gate  * to SunVTS without exposing the internal layout and implementation
41550Sstevel@tonic-gate  * of struct ce_info.
41560Sstevel@tonic-gate  */
41570Sstevel@tonic-gate static struct ecc_error_info ecc_error_info_data = {
41580Sstevel@tonic-gate 	{ "version", KSTAT_DATA_UINT32 },
41590Sstevel@tonic-gate 	{ "maxcount", KSTAT_DATA_UINT32 },
41600Sstevel@tonic-gate 	{ "count", KSTAT_DATA_UINT32 }
41610Sstevel@tonic-gate };
41620Sstevel@tonic-gate static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
41630Sstevel@tonic-gate     sizeof (struct kstat_named);
41640Sstevel@tonic-gate 
41650Sstevel@tonic-gate #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
41660Sstevel@tonic-gate #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
41670Sstevel@tonic-gate #endif
41680Sstevel@tonic-gate 
41690Sstevel@tonic-gate struct ce_info  *mem_ce_simm = NULL;
41700Sstevel@tonic-gate size_t mem_ce_simm_size = 0;
41710Sstevel@tonic-gate 
41720Sstevel@tonic-gate /*
41730Sstevel@tonic-gate  * Default values for the number of CE's allowed per interval.
41740Sstevel@tonic-gate  * Interval is defined in minutes
41750Sstevel@tonic-gate  * SOFTERR_MIN_TIMEOUT is defined in microseconds
41760Sstevel@tonic-gate  */
41770Sstevel@tonic-gate #define	SOFTERR_LIMIT_DEFAULT		2
41780Sstevel@tonic-gate #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
41790Sstevel@tonic-gate #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
41800Sstevel@tonic-gate #define	TIMEOUT_NONE			((timeout_id_t)0)
41810Sstevel@tonic-gate #define	TIMEOUT_SET			((timeout_id_t)1)
41820Sstevel@tonic-gate 
41830Sstevel@tonic-gate /*
41840Sstevel@tonic-gate  * timeout identifer for leaky_bucket
41850Sstevel@tonic-gate  */
41860Sstevel@tonic-gate static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
41870Sstevel@tonic-gate 
41880Sstevel@tonic-gate /*
41890Sstevel@tonic-gate  * Tunables for maximum number of allowed CE's in a given time
41900Sstevel@tonic-gate  */
41910Sstevel@tonic-gate int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
41920Sstevel@tonic-gate int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
41930Sstevel@tonic-gate 
41940Sstevel@tonic-gate void
41950Sstevel@tonic-gate cpu_mp_init(void)
41960Sstevel@tonic-gate {
41970Sstevel@tonic-gate 	size_t size = cpu_aflt_size();
41980Sstevel@tonic-gate 	size_t i;
41990Sstevel@tonic-gate 	kstat_t *ksp;
42000Sstevel@tonic-gate 
42010Sstevel@tonic-gate 	/*
42020Sstevel@tonic-gate 	 * Initialize the CE error handling buffers.
42030Sstevel@tonic-gate 	 */
42040Sstevel@tonic-gate 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
42050Sstevel@tonic-gate 	size = sizeof (struct ce_info) * mem_ce_simm_size;
42060Sstevel@tonic-gate 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
42070Sstevel@tonic-gate 
42080Sstevel@tonic-gate 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
42090Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
42100Sstevel@tonic-gate 	if (ksp != NULL) {
42110Sstevel@tonic-gate 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
42120Sstevel@tonic-gate 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
42130Sstevel@tonic-gate 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
42140Sstevel@tonic-gate 		ecc_error_info_data.count.value.ui32 = 0;
42150Sstevel@tonic-gate 		kstat_install(ksp);
42160Sstevel@tonic-gate 	}
42170Sstevel@tonic-gate 
42180Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
42190Sstevel@tonic-gate 		struct kstat_ecc_mm_info *kceip;
42200Sstevel@tonic-gate 
42210Sstevel@tonic-gate 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
42220Sstevel@tonic-gate 		    KM_SLEEP);
42230Sstevel@tonic-gate 		ksp = kstat_create("mm", i, "ecc-info", "misc",
42240Sstevel@tonic-gate 		    KSTAT_TYPE_NAMED,
42250Sstevel@tonic-gate 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
42260Sstevel@tonic-gate 		    KSTAT_FLAG_VIRTUAL);
42270Sstevel@tonic-gate 		if (ksp != NULL) {
42280Sstevel@tonic-gate 			/*
42290Sstevel@tonic-gate 			 * Re-declare ks_data_size to include room for the
42300Sstevel@tonic-gate 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
42310Sstevel@tonic-gate 			 * set.
42320Sstevel@tonic-gate 			 */
42330Sstevel@tonic-gate 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
42340Sstevel@tonic-gate 			    KSTAT_CE_UNUM_NAMLEN;
42350Sstevel@tonic-gate 			ksp->ks_data = kceip;
42360Sstevel@tonic-gate 			kstat_named_init(&kceip->name,
42370Sstevel@tonic-gate 			    "name", KSTAT_DATA_STRING);
42380Sstevel@tonic-gate 			kstat_named_init(&kceip->intermittent_total,
42390Sstevel@tonic-gate 			    "intermittent_total", KSTAT_DATA_UINT64);
42400Sstevel@tonic-gate 			kstat_named_init(&kceip->persistent_total,
42410Sstevel@tonic-gate 			    "persistent_total", KSTAT_DATA_UINT64);
42420Sstevel@tonic-gate 			kstat_named_init(&kceip->sticky_total,
42430Sstevel@tonic-gate 			    "sticky_total", KSTAT_DATA_UINT64);
42440Sstevel@tonic-gate 			/*
42450Sstevel@tonic-gate 			 * Use the default snapshot routine as it knows how to
42460Sstevel@tonic-gate 			 * deal with named kstats with long strings.
42470Sstevel@tonic-gate 			 */
42480Sstevel@tonic-gate 			ksp->ks_update = ecc_kstat_update;
42490Sstevel@tonic-gate 			kstat_install(ksp);
42500Sstevel@tonic-gate 		} else {
42510Sstevel@tonic-gate 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
42520Sstevel@tonic-gate 		}
42530Sstevel@tonic-gate 	}
42540Sstevel@tonic-gate }
42550Sstevel@tonic-gate 
42560Sstevel@tonic-gate /*ARGSUSED*/
42570Sstevel@tonic-gate static void
42580Sstevel@tonic-gate leaky_bucket_timeout(void *arg)
42590Sstevel@tonic-gate {
42600Sstevel@tonic-gate 	int i;
42610Sstevel@tonic-gate 	struct ce_info *psimm = mem_ce_simm;
42620Sstevel@tonic-gate 
42630Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
42640Sstevel@tonic-gate 		if (psimm[i].leaky_bucket_cnt > 0)
42650Sstevel@tonic-gate 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
42660Sstevel@tonic-gate 	}
42670Sstevel@tonic-gate 	add_leaky_bucket_timeout();
42680Sstevel@tonic-gate }
42690Sstevel@tonic-gate 
42700Sstevel@tonic-gate static void
42710Sstevel@tonic-gate add_leaky_bucket_timeout(void)
42720Sstevel@tonic-gate {
42730Sstevel@tonic-gate 	long timeout_in_microsecs;
42740Sstevel@tonic-gate 
42750Sstevel@tonic-gate 	/*
42760Sstevel@tonic-gate 	 * create timeout for next leak.
42770Sstevel@tonic-gate 	 *
42780Sstevel@tonic-gate 	 * The timeout interval is calculated as follows
42790Sstevel@tonic-gate 	 *
42800Sstevel@tonic-gate 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
42810Sstevel@tonic-gate 	 *
42820Sstevel@tonic-gate 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
42830Sstevel@tonic-gate 	 * in a minute), then multiply this by MICROSEC to get the interval
42840Sstevel@tonic-gate 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
42850Sstevel@tonic-gate 	 * the timeout interval is accurate to within a few microseconds.
42860Sstevel@tonic-gate 	 */
42870Sstevel@tonic-gate 
42880Sstevel@tonic-gate 	if (ecc_softerr_limit <= 0)
42890Sstevel@tonic-gate 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
42900Sstevel@tonic-gate 	if (ecc_softerr_interval <= 0)
42910Sstevel@tonic-gate 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
42920Sstevel@tonic-gate 
42930Sstevel@tonic-gate 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
42940Sstevel@tonic-gate 	    ecc_softerr_limit;
42950Sstevel@tonic-gate 
42960Sstevel@tonic-gate 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
42970Sstevel@tonic-gate 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
42980Sstevel@tonic-gate 
42990Sstevel@tonic-gate 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
43000Sstevel@tonic-gate 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
43010Sstevel@tonic-gate }
43020Sstevel@tonic-gate 
43030Sstevel@tonic-gate /*
43040Sstevel@tonic-gate  * Legacy Correctable ECC Error Hash
43050Sstevel@tonic-gate  *
43060Sstevel@tonic-gate  * All of the code below this comment is used to implement a legacy array
43070Sstevel@tonic-gate  * which counted intermittent, persistent, and sticky CE errors by unum,
43080Sstevel@tonic-gate  * and then was later extended to publish the data as a kstat for SunVTS.
43090Sstevel@tonic-gate  * All of this code is replaced by FMA, and remains here until such time
43100Sstevel@tonic-gate  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
43110Sstevel@tonic-gate  *
43120Sstevel@tonic-gate  * Errors are saved in three buckets per-unum:
43130Sstevel@tonic-gate  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
43140Sstevel@tonic-gate  *     This could represent a problem, and is immediately printed out.
43150Sstevel@tonic-gate  * (2) persistent - was successfully scrubbed
43160Sstevel@tonic-gate  *     These errors use the leaky bucket algorithm to determine
43170Sstevel@tonic-gate  *     if there is a serious problem.
43180Sstevel@tonic-gate  * (3) intermittent - may have originated from the cpu or upa/safari bus,
43190Sstevel@tonic-gate  *     and does not necessarily indicate any problem with the dimm itself,
43200Sstevel@tonic-gate  *     is critical information for debugging new hardware.
43210Sstevel@tonic-gate  *     Because we do not know if it came from the dimm, it would be
43220Sstevel@tonic-gate  *     inappropriate to include these in the leaky bucket counts.
43230Sstevel@tonic-gate  *
43240Sstevel@tonic-gate  * If the E$ line was modified before the scrub operation began, then the
43250Sstevel@tonic-gate  * displacement flush at the beginning of scrubphys() will cause the modified
43260Sstevel@tonic-gate  * line to be written out, which will clean up the CE.  Then, any subsequent
43270Sstevel@tonic-gate  * read will not cause an error, which will cause persistent errors to be
43280Sstevel@tonic-gate  * identified as intermittent.
43290Sstevel@tonic-gate  *
43300Sstevel@tonic-gate  * If a DIMM is going bad, it will produce true persistents as well as
43310Sstevel@tonic-gate  * false intermittents, so these intermittents can be safely ignored.
43320Sstevel@tonic-gate  *
43330Sstevel@tonic-gate  * If the error count is excessive for a DIMM, this function will return
43340Sstevel@tonic-gate  * PAGE_IS_FAILING, and the CPU module may then decide to remove that page
43350Sstevel@tonic-gate  * from use.
43360Sstevel@tonic-gate  */
43370Sstevel@tonic-gate static int
43380Sstevel@tonic-gate ce_count_unum(int status, int len, char *unum)
43390Sstevel@tonic-gate {
43400Sstevel@tonic-gate 	int i;
43410Sstevel@tonic-gate 	struct ce_info *psimm = mem_ce_simm;
43420Sstevel@tonic-gate 	int page_status = PAGE_IS_OK;
43430Sstevel@tonic-gate 
43440Sstevel@tonic-gate 	ASSERT(psimm != NULL);
43450Sstevel@tonic-gate 
43460Sstevel@tonic-gate 	if (len <= 0 ||
43470Sstevel@tonic-gate 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
43480Sstevel@tonic-gate 		return (page_status);
43490Sstevel@tonic-gate 
43500Sstevel@tonic-gate 	/*
43510Sstevel@tonic-gate 	 * Initialize the leaky_bucket timeout
43520Sstevel@tonic-gate 	 */
43530Sstevel@tonic-gate 	if (casptr(&leaky_bucket_timeout_id,
43540Sstevel@tonic-gate 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
43550Sstevel@tonic-gate 		add_leaky_bucket_timeout();
43560Sstevel@tonic-gate 
43570Sstevel@tonic-gate 	for (i = 0; i < mem_ce_simm_size; i++) {
43580Sstevel@tonic-gate 		if (psimm[i].name[0] == '\0') {
43590Sstevel@tonic-gate 			/*
43600Sstevel@tonic-gate 			 * Hit the end of the valid entries, add
43610Sstevel@tonic-gate 			 * a new one.
43620Sstevel@tonic-gate 			 */
43630Sstevel@tonic-gate 			(void) strncpy(psimm[i].name, unum, len);
43640Sstevel@tonic-gate 			if (status & ECC_STICKY) {
43650Sstevel@tonic-gate 				/*
43660Sstevel@tonic-gate 				 * Sticky - the leaky bucket is used to track
43670Sstevel@tonic-gate 				 * soft errors.  Since a sticky error is a
43680Sstevel@tonic-gate 				 * hard error and likely to be retired soon,
43690Sstevel@tonic-gate 				 * we do not count it in the leaky bucket.
43700Sstevel@tonic-gate 				 */
43710Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 0;
43720Sstevel@tonic-gate 				psimm[i].intermittent_total = 0;
43730Sstevel@tonic-gate 				psimm[i].persistent_total = 0;
43740Sstevel@tonic-gate 				psimm[i].sticky_total = 1;
43750Sstevel@tonic-gate 				cmn_err(CE_WARN,
43760Sstevel@tonic-gate 				    "[AFT0] Sticky Softerror encountered "
43770Sstevel@tonic-gate 				    "on Memory Module %s\n", unum);
43780Sstevel@tonic-gate 				page_status = PAGE_IS_FAILING;
43790Sstevel@tonic-gate 			} else if (status & ECC_PERSISTENT) {
43800Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 1;
43810Sstevel@tonic-gate 				psimm[i].intermittent_total = 0;
43820Sstevel@tonic-gate 				psimm[i].persistent_total = 1;
43830Sstevel@tonic-gate 				psimm[i].sticky_total = 0;
43840Sstevel@tonic-gate 			} else {
43850Sstevel@tonic-gate 				/*
43860Sstevel@tonic-gate 				 * Intermittent - Because the scrub operation
43870Sstevel@tonic-gate 				 * cannot find the error in the DIMM, we will
43880Sstevel@tonic-gate 				 * not count these in the leaky bucket
43890Sstevel@tonic-gate 				 */
43900Sstevel@tonic-gate 				psimm[i].leaky_bucket_cnt = 0;
43910Sstevel@tonic-gate 				psimm[i].intermittent_total = 1;
43920Sstevel@tonic-gate 				psimm[i].persistent_total = 0;
43930Sstevel@tonic-gate 				psimm[i].sticky_total = 0;
43940Sstevel@tonic-gate 			}
43950Sstevel@tonic-gate 			ecc_error_info_data.count.value.ui32++;
43960Sstevel@tonic-gate 			break;
43970Sstevel@tonic-gate 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
43980Sstevel@tonic-gate 			/*
43990Sstevel@tonic-gate 			 * Found an existing entry for the current
44000Sstevel@tonic-gate 			 * memory module, adjust the counts.
44010Sstevel@tonic-gate 			 */
44020Sstevel@tonic-gate 			if (status & ECC_STICKY) {
44030Sstevel@tonic-gate 				psimm[i].sticky_total++;
44040Sstevel@tonic-gate 				cmn_err(CE_WARN,
44050Sstevel@tonic-gate 				    "[AFT0] Sticky Softerror encountered "
44060Sstevel@tonic-gate 				    "on Memory Module %s\n", unum);
44070Sstevel@tonic-gate 				page_status = PAGE_IS_FAILING;
44080Sstevel@tonic-gate 			} else if (status & ECC_PERSISTENT) {
44090Sstevel@tonic-gate 				int new_value;
44100Sstevel@tonic-gate 
44110Sstevel@tonic-gate 				new_value = atomic_add_16_nv(
44120Sstevel@tonic-gate 				    &psimm[i].leaky_bucket_cnt, 1);
44130Sstevel@tonic-gate 				psimm[i].persistent_total++;
44140Sstevel@tonic-gate 				if (new_value > ecc_softerr_limit) {
44150Sstevel@tonic-gate 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
44160Sstevel@tonic-gate 					    " soft errors from Memory Module"
44170Sstevel@tonic-gate 					    " %s exceed threshold (N=%d,"
44180Sstevel@tonic-gate 					    " T=%dh:%02dm) triggering page"
44190Sstevel@tonic-gate 					    " retire", new_value, unum,
44200Sstevel@tonic-gate 					    ecc_softerr_limit,
44210Sstevel@tonic-gate 					    ecc_softerr_interval / 60,
44220Sstevel@tonic-gate 					    ecc_softerr_interval % 60);
44230Sstevel@tonic-gate 					atomic_add_16(
44240Sstevel@tonic-gate 					    &psimm[i].leaky_bucket_cnt, -1);
44250Sstevel@tonic-gate 					page_status = PAGE_IS_FAILING;
44260Sstevel@tonic-gate 				}
44270Sstevel@tonic-gate 			} else { /* Intermittent */
44280Sstevel@tonic-gate 				psimm[i].intermittent_total++;
44290Sstevel@tonic-gate 			}
44300Sstevel@tonic-gate 			break;
44310Sstevel@tonic-gate 		}
44320Sstevel@tonic-gate 	}
44330Sstevel@tonic-gate 
44340Sstevel@tonic-gate 	if (i >= mem_ce_simm_size)
44350Sstevel@tonic-gate 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
44360Sstevel@tonic-gate 		    "space.\n");
44370Sstevel@tonic-gate 
44380Sstevel@tonic-gate 	return (page_status);
44390Sstevel@tonic-gate }
44400Sstevel@tonic-gate 
44410Sstevel@tonic-gate /*
44420Sstevel@tonic-gate  * Function to support counting of IO detected CEs.
44430Sstevel@tonic-gate  */
44440Sstevel@tonic-gate void
44450Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
44460Sstevel@tonic-gate {
44470Sstevel@tonic-gate 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
44480Sstevel@tonic-gate 	    automatic_page_removal) {
44490Sstevel@tonic-gate 		page_t *pp = page_numtopp_nolock((pfn_t)
44500Sstevel@tonic-gate 		    (ecc->flt_addr >> MMU_PAGESHIFT));
44510Sstevel@tonic-gate 
44520Sstevel@tonic-gate 		if (pp) {
44530Sstevel@tonic-gate 			page_settoxic(pp, PAGE_IS_FAULTY);
44540Sstevel@tonic-gate 			(void) page_retire(pp, PAGE_IS_FAILING);
44550Sstevel@tonic-gate 		}
44560Sstevel@tonic-gate 	}
44570Sstevel@tonic-gate }
44580Sstevel@tonic-gate 
44590Sstevel@tonic-gate static int
44600Sstevel@tonic-gate ecc_kstat_update(kstat_t *ksp, int rw)
44610Sstevel@tonic-gate {
44620Sstevel@tonic-gate 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
44630Sstevel@tonic-gate 	struct ce_info *ceip = mem_ce_simm;
44640Sstevel@tonic-gate 	int i = ksp->ks_instance;
44650Sstevel@tonic-gate 
44660Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
44670Sstevel@tonic-gate 		return (EACCES);
44680Sstevel@tonic-gate 
44690Sstevel@tonic-gate 	ASSERT(ksp->ks_data != NULL);
44700Sstevel@tonic-gate 	ASSERT(i < mem_ce_simm_size && i >= 0);
44710Sstevel@tonic-gate 
44720Sstevel@tonic-gate 	/*
44730Sstevel@tonic-gate 	 * Since we're not using locks, make sure that we don't get partial
44740Sstevel@tonic-gate 	 * data. The name is always copied before the counters are incremented
44750Sstevel@tonic-gate 	 * so only do this update routine if at least one of the counters is
44760Sstevel@tonic-gate 	 * non-zero, which ensures that ce_count_unum() is done, and the
44770Sstevel@tonic-gate 	 * string is fully copied.
44780Sstevel@tonic-gate 	 */
44790Sstevel@tonic-gate 	if (ceip[i].intermittent_total == 0 &&
44800Sstevel@tonic-gate 	    ceip[i].persistent_total == 0 &&
44810Sstevel@tonic-gate 	    ceip[i].sticky_total == 0) {
44820Sstevel@tonic-gate 		/*
44830Sstevel@tonic-gate 		 * Uninitialized or partially initialized. Ignore.
44840Sstevel@tonic-gate 		 * The ks_data buffer was allocated via kmem_zalloc,
44850Sstevel@tonic-gate 		 * so no need to bzero it.
44860Sstevel@tonic-gate 		 */
44870Sstevel@tonic-gate 		return (0);
44880Sstevel@tonic-gate 	}
44890Sstevel@tonic-gate 
44900Sstevel@tonic-gate 	kstat_named_setstr(&kceip->name, ceip[i].name);
44910Sstevel@tonic-gate 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
44920Sstevel@tonic-gate 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
44930Sstevel@tonic-gate 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
44940Sstevel@tonic-gate 
44950Sstevel@tonic-gate 	return (0);
44960Sstevel@tonic-gate }
44970Sstevel@tonic-gate 
44980Sstevel@tonic-gate #define	VIS_BLOCKSIZE		64
44990Sstevel@tonic-gate 
45000Sstevel@tonic-gate int
45010Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
45020Sstevel@tonic-gate {
45030Sstevel@tonic-gate 	int ret, watched;
45040Sstevel@tonic-gate 
45050Sstevel@tonic-gate 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
45060Sstevel@tonic-gate 	ret = dtrace_blksuword32(addr, data, 0);
45070Sstevel@tonic-gate 	if (watched)
45080Sstevel@tonic-gate 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
45090Sstevel@tonic-gate 
45100Sstevel@tonic-gate 	return (ret);
45110Sstevel@tonic-gate }
45120Sstevel@tonic-gate 
45130Sstevel@tonic-gate /*ARGSUSED*/
45140Sstevel@tonic-gate void
45150Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp)
45160Sstevel@tonic-gate {
45170Sstevel@tonic-gate }
45180Sstevel@tonic-gate 
45190Sstevel@tonic-gate /*ARGSUSED*/
45200Sstevel@tonic-gate void
45210Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp)
45220Sstevel@tonic-gate {
45230Sstevel@tonic-gate }
45240Sstevel@tonic-gate 
45250Sstevel@tonic-gate static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
45260Sstevel@tonic-gate 	(1 << TTE32M) | (1 << TTE256M));
45270Sstevel@tonic-gate static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
45280Sstevel@tonic-gate 
45290Sstevel@tonic-gate /*
45300Sstevel@tonic-gate  * The function returns the US_II mmu-specific values for the
45310Sstevel@tonic-gate  * hat's disable_large_pages and disable_ism_large_pages variables.
45320Sstevel@tonic-gate  */
45330Sstevel@tonic-gate int
45340Sstevel@tonic-gate mmu_large_pages_disabled(uint_t flag)
45350Sstevel@tonic-gate {
45360Sstevel@tonic-gate 	int pages_disable = 0;
45370Sstevel@tonic-gate 
45380Sstevel@tonic-gate 	if (flag == HAT_LOAD) {
45390Sstevel@tonic-gate 		pages_disable = mmu_disable_large_pages;
45400Sstevel@tonic-gate 	} else if (flag == HAT_LOAD_SHARE) {
45410Sstevel@tonic-gate 		pages_disable = mmu_disable_ism_large_pages;
45420Sstevel@tonic-gate 	}
45430Sstevel@tonic-gate 	return (pages_disable);
45440Sstevel@tonic-gate }
45450Sstevel@tonic-gate 
45460Sstevel@tonic-gate /*ARGSUSED*/
45470Sstevel@tonic-gate void
45480Sstevel@tonic-gate mmu_init_kernel_pgsz(struct hat *hat)
45490Sstevel@tonic-gate {
45500Sstevel@tonic-gate }
45510Sstevel@tonic-gate 
45520Sstevel@tonic-gate size_t
45530Sstevel@tonic-gate mmu_get_kernel_lpsize(size_t lpsize)
45540Sstevel@tonic-gate {
45550Sstevel@tonic-gate 	uint_t tte;
45560Sstevel@tonic-gate 
45570Sstevel@tonic-gate 	if (lpsize == 0) {
45580Sstevel@tonic-gate 		/* no setting for segkmem_lpsize in /etc/system: use default */
45590Sstevel@tonic-gate 		return (MMU_PAGESIZE4M);
45600Sstevel@tonic-gate 	}
45610Sstevel@tonic-gate 
45620Sstevel@tonic-gate 	for (tte = TTE8K; tte <= TTE4M; tte++) {
45630Sstevel@tonic-gate 		if (lpsize == TTEBYTES(tte))
45640Sstevel@tonic-gate 			return (lpsize);
45650Sstevel@tonic-gate 	}
45660Sstevel@tonic-gate 
45670Sstevel@tonic-gate 	return (TTEBYTES(TTE8K));
45680Sstevel@tonic-gate }
4569